[Erp5-report] r34397 nicolas - /erp5/trunk/products/ERP5/Document/EmailDocument.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Thu Apr 8 16:48:44 CEST 2010
Author: nicolas
Date: Thu Apr 8 16:48:44 2010
New Revision: 34397
URL: http://svn.erp5.org?rev=34397&view=rev
Log:
Change ingested message introspection for getTextContent
According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
getTextContent should return html part of multipart/alternative couple
If multipart/mixed, the html part is an attachementi not the message. So return the
main content (best match in multipart/alternative couple).
Modified:
erp5/trunk/products/ERP5/Document/EmailDocument.py
Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=34397&r1=34396&r2=34397&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] Thu Apr 8 16:48:44 2010
@@ -415,7 +415,11 @@
"""
Returns the content of the email as text. This is useful
to display the content of an email.
-
+
+ According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
+ getTextContent should return html part of multipart/alternative couple
+ If multipart/mixed, the html part is an attachement. So return the
+ main content (text/plain).
TODO: add support for legacy objects
"""
if not self.hasFile() or self._baseGetTextContent() is not None:
@@ -429,8 +433,14 @@
# find from mail message
text_result = None
html_result = None
+ is_alternative = False
for part in self._getMessage().walk():
- if part.get_content_type() == 'text/plain' and not text_result and not part.is_multipart():
+ if part.is_multipart():
+ if part.get_content_type() == 'multipart/alternative':
+ is_alternative = True
+ else:
+ is_alternative = False
+ elif part.get_content_type() == 'text/plain' and not is_alternative:
part_encoding = part.get_content_charset()
message_text = part.get_payload(decode=1)
if part_encoding != 'utf-8':
@@ -453,16 +463,16 @@
text_result = repr(message_text)
else:
text_result = message_text
- elif part.get_content_type() == 'text/html' and not html_result and not part.is_multipart():
+ break
+ elif part.get_content_type() == 'text/html' and is_alternative:
part_encoding = part.get_content_charset()
part_html = part.get_payload(decode=1)
# Invoke Document class HTML stripper
- mime, html_result = self.convert(format='html',
+ mime, text_result = self.convert(format='html',
text_content=part_html,
charset=part_encoding)
- if html_result:
- # Give priority to HTML
- text_result = html_result
+ break
+
if default is _MARKER:
return text_result
return text_result or default
@@ -480,8 +490,14 @@
return self._baseGetTextFormat()
else:
return self._baseGetTextFormat(default)
+ is_alternative = False
for part in self._getMessage().walk():
- if part.get_content_type() == 'text/html' and not part.is_multipart():
+ if part.is_multipart():
+ if part.get_content_type() == 'multipart/alternative':
+ is_alternative = True
+ else:
+ is_alternative = False
+ elif part.get_content_type() == 'text/html' and is_alternative:
return 'text/html'
return 'text/plain'
More information about the Erp5-report
mailing list