[Erp5-report] r30105 - /erp5/trunk/utils/erp5diff/ERP5Diff.py

nobody at svn.erp5.org nobody at svn.erp5.org
Thu Oct 29 14:20:00 CET 2009


Author: nicolas
Date: Thu Oct 29 14:19:58 2009
New Revision: 30105

URL: http://svn.erp5.org?rev=30105&view=rev
Log:
- Performance improvement:Use iterator to compare files,
this avoid reading entirely the files if they differ during reading.
- Use parser with remove_blank_text flag to normalise output and c14n.
- Do not append tail to xupdate elements if not needed.

Modified:
    erp5/trunk/utils/erp5diff/ERP5Diff.py

Modified: erp5/trunk/utils/erp5diff/ERP5Diff.py
URL: http://svn.erp5.org/erp5/trunk/utils/erp5diff/ERP5Diff.py?rev=30105&r1=30104&r2=30105&view=diff
==============================================================================
--- erp5/trunk/utils/erp5diff/ERP5Diff.py [utf8] (original)
+++ erp5/trunk/utils/erp5diff/ERP5Diff.py [utf8] Thu Oct 29 14:19:58 2009
@@ -22,16 +22,31 @@
 ##############################################################################
 
 from lxml import etree
+parser = etree.XMLParser(remove_blank_text=True)
 
 import sys
 import getopt
 import os
-from StringIO import StringIO
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
 import re
 import codecs
 from copy import deepcopy
 from interfaces.erp5diff import IERP5Diff
 import zope.interface
+
+class FileContentNotEqual(Exception):
+  pass
+
+def fileComparisonIterator(file1, file2):
+  value1 = file1.next()
+  value2 = file2.next()
+  if value1 == value2:
+    yield value1, value2
+  else:
+    raise FileContentNotEqual
 
 class ERP5Diff:
   """
@@ -52,7 +67,7 @@
   # Declarative interfaces
   zope.interface.implements(IERP5Diff,)
 
-  __version__ = 0.4
+  __version__ = 0.5
 
   def __init__(self):
     """
@@ -82,9 +97,9 @@
     doc_list = []
     for a in args:
       if isinstance(a, str):
-        doc_list.append(etree.fromstring(a))
+        doc_list.append(etree.fromstring(a, parser))
       else:
-        element_tree = etree.parse(a)
+        element_tree = etree.parse(a, parser)
         doc_list.append(element_tree.getroot())
     return doc_list
 
@@ -252,7 +267,7 @@
       for child in element:
         clone_node = deepcopy(child)
         child_element.append(clone_node)
-      if self._hasChildren(child_element):
+      if self._hasChildren(child_element) and element.text is not None:
         child_element[-1].tail = element.text
       else:
         child_element.text = element.text
@@ -393,17 +408,25 @@
     new_candidate_list = new_list[:]
     for old_element in old_list:
       old_tree = etree.fromstring(etree.tostring(old_element)).getroottree()
-      f = StringIO()
-      old_tree.write_c14n(f)
-      old_C14n = f.getvalue()
+      old_c14n = StringIO()
+      old_tree.write_c14n(old_c14n)
+      old_c14n.seek(0)
       for new_element in new_list:
         if new_element not in new_candidate_list:
           continue 
         new_tree = etree.fromstring(etree.tostring(new_element)).getroottree()
-        f = StringIO()
-        new_tree.write_c14n(f)
-        new_C14n = f.getvalue()
-        if old_C14n == new_C14n:
+        new_c14n = StringIO()
+        new_tree.write_c14n(new_c14n)
+        new_c14n.seek(0)
+        file_equality = True
+        try:
+          #Use generator to avoid reading file entirely
+          #Stop iteration at first difference
+          list(fileComparisonIterator(old_c14n, new_c14n))
+        except FileContentNotEqual:
+          file_equality = False
+        old_c14n.seek(0)
+        if file_equality:
           if new_element in new_candidate_list:
             new_candidate_list.remove(new_element)
           if old_element in old_candidate_list:




More information about the Erp5-report mailing list