[Erp5-report] r25557 - /erp5/trunk/products/ERP5/Document/Document.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Sat Feb 14 10:28:11 CET 2009
Author: jp
Date: Sat Feb 14 10:28:10 2009
New Revision: 25557
URL: http://svn.erp5.org?rev=25557&view=rev
Log:
Make _stripHTML a reusable private method for all subclasses.
Modified:
erp5/trunk/products/ERP5/Document/Document.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=25557&r1=25556&r2=25557&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Sat Feb 14 10:28:10 2009
@@ -1271,6 +1271,13 @@
return data
kw['format'] = 'html'
mime, html = self.convert(**kw)
+ return self._stripHTML(str(html))
+
+ def _stripHTML(self, html, charset=None):
+ """
+ A private method which can be reused by subclasses
+ to strip HTML content
+ """
body_list = re.findall(self.body_parser, str(html))
if len(body_list):
stripped_html = body_list[0]
@@ -1279,6 +1286,9 @@
# find charset and convert to utf-8
charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient is datastream
# instance but hard to do better
+ if charset and not charset_list:
+ # Use optional parameter is we can not find encoding in HTML
+ charset_list = [charset]
if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
try:
stripped_html = unicode(str(stripped_html),
More information about the Erp5-report
mailing list