[Erp5-report] r45422 nicolas - /erp5/trunk/products/ERP5OOo/transforms/
nobody at svn.erp5.org
nobody at svn.erp5.org
Thu Apr 14 11:50:42 CEST 2011
Author: nicolas
Date: Thu Apr 14 11:50:42 2011
New Revision: 45422
URL: http://svn.erp5.org?rev=45422&view=rev
Log:
Workaround a bug in lxml when include_meta_content_type parameter is not honoured.
This patch will be followed by a Ticket addressed to lxml maintainers.
As soon as a clean patch will be released, this commit must be reverted.
Modified:
erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py
erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py
Modified: erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py?rev=45422&r1=45421&r2=45422&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py [utf8] (original)
+++ erp5/trunk/products/ERP5OOo/transforms/html_to_odt.py [utf8] Thu Apr 14 11:50:42 2011
@@ -2,6 +2,7 @@
from Products.PortalTransforms.interfaces import itransform
from zope.interface import implements
from oood_commandtransform import OOOdCommandTransform, OOoDocumentDataStream
+from oood_commandtransform import includeMetaContentType
from zLOG import LOG
from lxml import etree, html
from lxml.etree import Element, SubElement
@@ -32,6 +33,7 @@ class HTMLToOdt:
def convert(self, orig, data, cache=None, filename=None, context=None, **kwargs):
# Try to recover broken HTML documents, specially regarding encoding used
html_node = etree.XML(orig, parser=html_parser)
+ includeMetaContentType(html_node)
orig = html.tostring(html_node, encoding='utf-8', method='xml',
include_meta_content_type=True)
Modified: erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py?rev=45422&r1=45421&r2=45422&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py [utf8] (original)
+++ erp5/trunk/products/ERP5OOo/transforms/oood_commandtransform.py [utf8] Thu Apr 14 11:50:42 2011
@@ -14,6 +14,7 @@ import re
from lxml import etree
from lxml import html
from lxml.etree import ParseError, Element
+from lxml.etree import SubElement
from urllib import unquote
from urlparse import urlparse
@@ -29,6 +30,20 @@ from Products.ERP5OOo.Document.OOoDocume
from Products.ERP5OOo.Document.OOoDocument import enc
from Products.ERP5OOo.Document.OOoDocument import dec
+def includeMetaContentType(html_node):
+ """XXX Temp workaround time to fix issue
+ in lxml when include_meta_content_type is not honoured
+ Force encondig into utf-8
+ """
+ head = html_node.find('head')
+ if head is None:
+ head = SubElement(html_node, 'head')
+ meta_content_type_node_list = head.xpath('meta[translate('\
+ 'attribute::http-equiv, "CONTEYP", "conteyp") = "content-type"]')
+ for meta_content_type_node in meta_content_type_node_list:
+ head.remove(meta_content_type_node)
+ SubElement(head, 'meta', **{'http-equiv': 'Content-Type',
+ 'content': 'application/xhtml+xml; charset=utf-8'})
CLEAN_RELATIVE_PATH = re.compile('^../')
@@ -194,8 +209,11 @@ class OOOdCommandTransform(commandtransf
parent_node.append(style_node)
style_node.attrib.update({'type': 'text/css'})
parent_node.remove(css_link_tag)
+
+ includeMetaContentType(xml_doc)
xml_output = html.tostring(xml_doc, encoding='utf-8', method='xml',
include_meta_content_type=True)
+
xml_output = xml_output.replace('<title/>', '<title></title>')
return xml_output
More information about the Erp5-report
mailing list