[Erp5-report] r30105 - /erp5/trunk/utils/erp5diff/ERP5Diff.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Thu Oct 29 14:20:00 CET 2009
Author: nicolas
Date: Thu Oct 29 14:19:58 2009
New Revision: 30105
URL: http://svn.erp5.org?rev=30105&view=rev
Log:
- Performance improvement:Use iterator to compare files,
this avoid reading entirely the files if they differ during reading.
- Use parser with remove_blank_text flag to normalise output and c14n.
- Do not append tail to xupdate elements if not needed.
Modified:
erp5/trunk/utils/erp5diff/ERP5Diff.py
Modified: erp5/trunk/utils/erp5diff/ERP5Diff.py
URL: http://svn.erp5.org/erp5/trunk/utils/erp5diff/ERP5Diff.py?rev=30105&r1=30104&r2=30105&view=diff
==============================================================================
--- erp5/trunk/utils/erp5diff/ERP5Diff.py [utf8] (original)
+++ erp5/trunk/utils/erp5diff/ERP5Diff.py [utf8] Thu Oct 29 14:19:58 2009
@@ -22,16 +22,31 @@
##############################################################################
from lxml import etree
+parser = etree.XMLParser(remove_blank_text=True)
import sys
import getopt
import os
-from StringIO import StringIO
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
import re
import codecs
from copy import deepcopy
from interfaces.erp5diff import IERP5Diff
import zope.interface
+
+class FileContentNotEqual(Exception):
+ pass
+
+def fileComparisonIterator(file1, file2):
+ value1 = file1.next()
+ value2 = file2.next()
+ if value1 == value2:
+ yield value1, value2
+ else:
+ raise FileContentNotEqual
class ERP5Diff:
"""
@@ -52,7 +67,7 @@
# Declarative interfaces
zope.interface.implements(IERP5Diff,)
- __version__ = 0.4
+ __version__ = 0.5
def __init__(self):
"""
@@ -82,9 +97,9 @@
doc_list = []
for a in args:
if isinstance(a, str):
- doc_list.append(etree.fromstring(a))
+ doc_list.append(etree.fromstring(a, parser))
else:
- element_tree = etree.parse(a)
+ element_tree = etree.parse(a, parser)
doc_list.append(element_tree.getroot())
return doc_list
@@ -252,7 +267,7 @@
for child in element:
clone_node = deepcopy(child)
child_element.append(clone_node)
- if self._hasChildren(child_element):
+ if self._hasChildren(child_element) and element.text is not None:
child_element[-1].tail = element.text
else:
child_element.text = element.text
@@ -393,17 +408,25 @@
new_candidate_list = new_list[:]
for old_element in old_list:
old_tree = etree.fromstring(etree.tostring(old_element)).getroottree()
- f = StringIO()
- old_tree.write_c14n(f)
- old_C14n = f.getvalue()
+ old_c14n = StringIO()
+ old_tree.write_c14n(old_c14n)
+ old_c14n.seek(0)
for new_element in new_list:
if new_element not in new_candidate_list:
continue
new_tree = etree.fromstring(etree.tostring(new_element)).getroottree()
- f = StringIO()
- new_tree.write_c14n(f)
- new_C14n = f.getvalue()
- if old_C14n == new_C14n:
+ new_c14n = StringIO()
+ new_tree.write_c14n(new_c14n)
+ new_c14n.seek(0)
+ file_equality = True
+ try:
+ #Use generator to avoid reading file entirely
+ #Stop iteration at first difference
+ list(fileComparisonIterator(old_c14n, new_c14n))
+ except FileContentNotEqual:
+ file_equality = False
+ old_c14n.seek(0)
+ if file_equality:
if new_element in new_candidate_list:
new_candidate_list.remove(new_element)
if old_element in old_candidate_list:
More information about the Erp5-report
mailing list