[Erp5-report] r34397 nicolas - /erp5/trunk/products/ERP5/Document/EmailDocument.py

nobody at svn.erp5.org nobody at svn.erp5.org
Thu Apr 8 16:48:44 CEST 2010


Author: nicolas
Date: Thu Apr  8 16:48:44 2010
New Revision: 34397

URL: http://svn.erp5.org?rev=34397&view=rev
Log:
Change ingested message introspection for getTextContent
  According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
  getTextContent should return html part of multipart/alternative couple
  If multipart/mixed, the html part is an attachementi not the message. So return the
  main content (best match in multipart/alternative couple).

Modified:
    erp5/trunk/products/ERP5/Document/EmailDocument.py

Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=34397&r1=34396&r2=34397&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] Thu Apr  8 16:48:44 2010
@@ -415,7 +415,11 @@
     """
     Returns the content of the email as text. This is useful
     to display the content of an email.
-    
+
+    According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
+    getTextContent should return html part of multipart/alternative couple
+    If multipart/mixed, the html part is an attachement. So return the
+    main content (text/plain).
     TODO: add support for legacy objects
     """
     if not self.hasFile() or self._baseGetTextContent() is not None:
@@ -429,8 +433,14 @@
     # find from mail message
     text_result = None
     html_result = None
+    is_alternative = False
     for part in self._getMessage().walk():
-      if part.get_content_type() == 'text/plain' and not text_result and not part.is_multipart():
+      if part.is_multipart():
+        if part.get_content_type() == 'multipart/alternative':
+          is_alternative = True
+        else:
+          is_alternative = False
+      elif part.get_content_type() == 'text/plain' and not is_alternative:
         part_encoding = part.get_content_charset()
         message_text = part.get_payload(decode=1)
         if part_encoding != 'utf-8':
@@ -453,16 +463,16 @@
               text_result = repr(message_text)
         else:
           text_result = message_text
-      elif part.get_content_type() == 'text/html' and not html_result and not part.is_multipart():
+        break
+      elif part.get_content_type() == 'text/html' and is_alternative:
         part_encoding = part.get_content_charset()
         part_html = part.get_payload(decode=1)
         # Invoke Document class HTML stripper
-        mime, html_result = self.convert(format='html',
+        mime, text_result = self.convert(format='html',
                                          text_content=part_html,
                                          charset=part_encoding)
-    if html_result:
-      # Give priority to HTML
-      text_result = html_result
+        break
+
     if default is _MARKER:
       return text_result
     return text_result or default
@@ -480,8 +490,14 @@
         return self._baseGetTextFormat()
       else:
         return self._baseGetTextFormat(default)
+    is_alternative = False
     for part in self._getMessage().walk():
-      if part.get_content_type() == 'text/html' and not part.is_multipart():
+      if part.is_multipart():
+        if part.get_content_type() == 'multipart/alternative':
+          is_alternative = True
+        else:
+          is_alternative = False
+      elif part.get_content_type() == 'text/html' and is_alternative:
         return 'text/html'
     return 'text/plain'
 




More information about the Erp5-report mailing list