[Erp5-report] r45422 nicolas - /erp5/trunk/products/ERP5OOo/transforms/

nobody at svn.erp5.org nobody at svn.erp5.org
Thu Apr 14 11:50:42 CEST 2011


Author: nicolas
Date: Thu Apr 14 11:50:42 2011
New Revision: 45422

URL: http://svn.erp5.org?rev=45422&view=rev
Log:
Workaround a bug in lxml when include_meta_content_type parameter is not honoured.
This patch will be followed by a Ticket addressed to lxml maintainers.

As soon as a clean patch will be released, this commit must be reverted.


Modified:
    erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py
    erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py

Modified: erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py?rev=45422&r1=45421&r2=45422&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py [utf8] (original)
+++ erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py [utf8] Thu Apr 14 11:50:42 2011
@@ -2,6 +2,7 @@
 from Products.PortalTransforms.interfaces import itransform
 from zope.interface import implements
 from oood_commandtransform import OOOdCommandTransform, OOoDocumentDataStream
+from  oood_commandtransform import includeMetaContentType
 from zLOG import LOG
 from lxml import etree, html
 from lxml.etree import Element, SubElement
@@ -32,6 +33,7 @@ class HTMLToOdt:
   def convert(self, orig, data, cache=None, filename=None, context=None, **kwargs):
     # Try to recover broken HTML documents, specially regarding encoding used
     html_node = etree.XML(orig, parser=html_parser)
+    includeMetaContentType(html_node)
     orig = html.tostring(html_node, encoding='utf-8', method='xml',
                          include_meta_content_type=True)
 

Modified: erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py?rev=45422&r1=45421&r2=45422&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py [utf8] (original)
+++ erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py [utf8] Thu Apr 14 11:50:42 2011
@@ -14,6 +14,7 @@ import re
 from lxml import etree
 from lxml import html
 from lxml.etree import ParseError, Element
+from lxml.etree import SubElement
 
 from urllib import unquote
 from urlparse import urlparse
@@ -29,6 +30,20 @@ from Products.ERP5OOo.Document.OOoDocume
 from Products.ERP5OOo.Document.OOoDocument import enc
 from Products.ERP5OOo.Document.OOoDocument import dec
 
+def includeMetaContentType(html_node):
+  """XXX Temp workaround time to fix issue
+  in lxml when include_meta_content_type is not honoured
+  Force encondig into utf-8
+  """
+  head = html_node.find('head')
+  if head is None:
+    head = SubElement(html_node, 'head')
+  meta_content_type_node_list = head.xpath('meta[translate('\
+               'attribute::http-equiv, "CONTEYP", "conteyp") = "content-type"]')
+  for meta_content_type_node in meta_content_type_node_list:
+    head.remove(meta_content_type_node)
+  SubElement(head, 'meta', **{'http-equiv': 'Content-Type',
+                              'content': 'application/xhtml+xml; charset=utf-8'})
 
 CLEAN_RELATIVE_PATH = re.compile('^../')
 
@@ -194,8 +209,11 @@ class OOOdCommandTransform(commandtransf
           parent_node.append(style_node)
           style_node.attrib.update({'type': 'text/css'})
           parent_node.remove(css_link_tag)
+
+    includeMetaContentType(xml_doc)
     xml_output = html.tostring(xml_doc, encoding='utf-8', method='xml',
                                include_meta_content_type=True)
+
     xml_output = xml_output.replace('<title/>', '<title></title>')
     return xml_output
 



More information about the Erp5-report mailing list