[Erp5-report] r25565 - in /erp5/trunk/products/ERP5/Document: Document.py EmailDocument.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Sat Feb 14 11:05:02 CET 2009
Author: jp
Date: Sat Feb 14 11:05:01 2009
New Revision: 25565
URL: http://svn.erp5.org?rev=25565&view=rev
Log:
Move generic HTML processing to where it belongs (ie. conversion handling superclass for now, mixin some day)
Modified:
erp5/trunk/products/ERP5/Document/Document.py
erp5/trunk/products/ERP5/Document/EmailDocument.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=25565&r1=25564&r2=25565&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Sat Feb 14 11:05:01 2009
@@ -1278,14 +1278,25 @@
A private method which can be reused by subclasses
to strip HTML content
"""
+ def _guessEncoding(self, string):
+ """
+ Some Email Clients indicate wrong encoding
+ This method try to guess which encoding is used.
+ """
+ try:
+ import chardet
+ except ImportError:
+ return None
+ return chardet.detect(string).get('encoding', None)
+
body_list = re.findall(self.body_parser, str(html))
if len(body_list):
stripped_html = body_list[0]
else:
stripped_html = html
# find charset and convert to utf-8
- charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient is datastream
- # instance but hard to do better
+ charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this
+ # is datastream instance but hard to do better
if charset and not charset_list:
# Use optional parameter is we can not find encoding in HTML
charset_list = [charset]
@@ -1297,6 +1308,7 @@
return str(stripped_html)
return stripped_html
+
security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
def getContentInformation(self):
"""
Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=25565&r1=25564&r2=25565&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] Sat Feb 14 11:05:01 2009
@@ -39,6 +39,8 @@
from Products.ERP5.Document.File import File
from Products.ERP5.Document.Document import ConversionError
from Products.ERP5.Tool.NotificationTool import buildEmailMessage
+
+from zLOG import LOG, INFO
try:
from Products.MimetypesRegistry.common import MimeTypeException
@@ -300,7 +302,9 @@
text_result = message_text.decode(part_encoding).encode('utf-8')
else:
text_result = message_text.decode().encode('utf-8')
- except (UnicodeDecodeError, LookupError):
+ except (UnicodeDecodeError, LookupError), error_message:
+ LOG('EmailDocument.getTextContent', INFO,
+ 'Failed to decode %s TEXT message with error: %s' % (part_encoding, error_message))
codec = self._guessEncoding(message_text)
if codec is not None:
try:
@@ -313,24 +317,12 @@
text_result = message_text
elif part.get_content_type() == 'text/html' and not html_result and not part.is_multipart():
part_encoding = part.get_content_charset()
- message_text = part.get_payload(decode=1)
- if part_encoding != 'utf-8':
- try:
- if part_encoding is not None:
- text_result = message_text.decode(part_encoding).encode('utf-8')
- else:
- text_result = message_text.decode().encode('utf-8')
- except (UnicodeDecodeError, LookupError):
- codec = self._guessEncoding(message_text)
- if codec is not None:
- try:
- text_result = message_text.decode(codec).encode('utf-8')
- except (UnicodeDecodeError, LookupError):
- text_result = repr(message_text)
- else:
- text_result = repr(message_text)
- else:
- text_result = message_text
+ part_html = part.get_payload(decode=1)
+ # Invoke Document class HTML stripper
+ html_result = self._stripHTML(part_html, charset=part_encoding)
+ if html_result:
+ # Give priority to HTML
+ text_result = html_result
if default is _MARKER:
return text_result
return text_result or default
@@ -399,6 +391,8 @@
"""
For FCKEditor Compatibility, we should remove DTD,
blank lines and some tags in html document
+
+ XXX - What is this SHIT !!!!!!!!!!!!!!!!!!!!!!!!!!
"""
if html_text is None:
html_text = self.getTextContent()
@@ -626,17 +620,6 @@
"""
self.MailHost.send(message)
- def _guessEncoding(self, string):
- """
- Some Email Clients indicate wrong encoding
- This method try to guess which encoding is used.
- """
- try:
- import chardet
- except ImportError:
- return None
- return chardet.detect(string).get('encoding', None)
-
## Compatibility layer
#from Products.ERP5Type import Document
#Document.MailMessage = EmailDocument
More information about the Erp5-report
mailing list