[Erp5-report] r41898 kazuhiko - /erp5/trunk/products/ERP5/Document/TextDocument.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Fri Dec 31 00:33:26 CET 2010
Author: kazuhiko
Date: Fri Dec 31 00:33:26 2010
New Revision: 41898
URL: http://svn.erp5.org?rev=41898&view=rev
Log:
since XML may contain encoding, changing encoding only is not good. so we try to parse and reoutput utf-8 XML with a valid encoding information for XML.
Modified:
erp5/trunk/products/ERP5/Document/TextDocument.py
Modified: erp5/trunk/products/ERP5/Document/TextDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/TextDocument.py?rev=41898&r1=41897&r2=41898&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] Fri Dec 31 00:33:26 2010
@@ -47,6 +47,7 @@ except ImportError:
from Products.ERP5Type.Utils import guessEncodingFromText
from lxml import html as etree_html
+from lxml import etree
class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin,
TextContent, File):
@@ -306,7 +307,15 @@ class TextDocument(CachedConvertableMixi
content_type = self.getContentType() or DEFAULT_CONTENT_TYPE
text_content = self.getData()
- if content_type == 'text/html':
+ if content_type.endswith('xml'):
+ try:
+ tree = etree.fromstring(text_content)
+ text_content = etree.tostring(tree, encoding='utf-8', xml_declaration=True)
+ content_type = 'application/xml'
+ message = 'Conversion to base format succeeds'
+ except etree.XMLSyntaxError:
+ message = 'Conversion to base format without codec fails'
+ elif content_type == 'text/html':
re_match = self.charset_parser.search(text_content)
message = 'Conversion to base format succeeds'
if re_match is not None:
More information about the Erp5-report
mailing list