[Erp5-report] r6332 - /erp5/trunk/products/ERP5/Document/MailMessage.py

nobody at svn.erp5.org nobody at svn.erp5.org
Tue Mar 28 13:30:17 CEST 2006


Author: kevin
Date: Tue Mar 28 13:30:14 2006
New Revision: 6332

URL: http://svn.erp5.org?rev=6332&view=rev
Log:
Differentiate "no-encoded" body (= "8bit", "7bit" and "binary" encoding) and not-supported endoding by introducing "None" statement in the encoding dict.
Add support for 'quoted-printable' encoding in python 2.3 environments.
Don't modify attachments on edit.
Always save a clean mail body when edit.
Make sure charset declaration and body encoding are consistent.
Refactor API and add lots of usefull methods.
Add some generic methods to edit and get the header and body.

Modified:
    erp5/trunk/products/ERP5/Document/MailMessage.py

Modified: erp5/trunk/products/ERP5/Document/MailMessage.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/MailMessage.py?rev=6332&r1=6331&r2=6332&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/MailMessage.py (original)
+++ erp5/trunk/products/ERP5/Document/MailMessage.py Tue Mar 28 13:30:14 2006
@@ -40,20 +40,38 @@
 
 from zLOG import LOG
 
-# API of base64 has changed between python v2.3 and v2.4
+# TODO: support "from"/"to" field header QP decoding
+
+# Support mail decoding in both python v2.3 and v2.4.
+# See http://www.freesoft.org/CIE/RFC/1521/5.htm for 'content-transfer-encoding' explaination.
 import base64
-global supported_encoding
-supported_encoding = {}
+global supported_decoding
+supported_decoding = {}
 try:
   # python v2.4 API
-  supported_encoding = { 'base64': base64.b64decode
-                       , 'base32': base64.b32decode
-                       , 'base16': base64.b16decode
-                       }
+  supported_decoding = {
+      'base64'          : base64.b64decode
+    , 'base32'          : base64.b32decode
+    , 'base16'          : base64.b16decode
+#    , 'quoted-printable': None
+#    , 'uuencode'        : None
+    # "8bit", "7bit", and "binary" values all mean that NO encoding has been performed
+    , '8bit'            : None
+    , '7bit'            : None
+    , 'binary'          : None
+    }
 except AttributeError:
   # python v2.3 API
-  supported_encoding = { 'base64': base64.decodestring
-                       }
+  import binascii
+  supported_decoding = {
+      'base64'          : base64.decodestring
+    , 'quoted-printable': binascii.a2b_qp
+#    , 'uuencode'        : None
+    # "8bit", "7bit", and "binary" values all mean that NO encoding has been performed
+    , '8bit'            : None
+    , '7bit'            : None
+    , 'binary'          : None
+    }
 
 
 class MailMessage(XMLObject, Event, CMFMailInMessage):
@@ -61,11 +79,11 @@
     MailMessage subclasses Event objects to implement Email Events.
   """
 
-  meta_type = 'ERP5 Mail Message'
-  portal_type = 'Mail Message'
-  add_permission = Permissions.AddPortalContent
+  meta_type       = 'ERP5 Mail Message'
+  portal_type     = 'Mail Message'
+  add_permission  = Permissions.AddPortalContent
   isPortalContent = 1
-  isRADContent = 1
+  isRADContent    = 1
 
   # Declarative security
   security = ClassSecurityInfo()
@@ -81,38 +99,81 @@
                     )
 
   def __init__(self, *args, **kw):
-    kw = self.cleanIncomingMessage(**kw)
     XMLObject.__init__(self, *args, **kw)
-
-  def _edit(self, *args, **kw):
-    #LOG('MailMessage._edit', 0, str(kw))
-    kw = self.cleanIncomingMessage(**kw)
-    XMLObject._edit(self, *args, **kw)
-
-  def cleanIncomingMessage(self, **kw):
-    """
-      Clean up the the message data that came from the portal_mailin tool.
-    """
-    # Delete attachments
+    # Save attachments in a special variable
     attachments = kw.get('attachments', {})
     if kw.has_key('attachments'):
       del kw['attachments']
     self.attachments = attachments
-    # Decode MIME base64/32/16 data
-    if kw.has_key('header') and kw['header'].has_key('content-transfer-encoding'):
-      content_encoding = kw['header']['content-transfer-encoding']
-      if content_encoding in supported_encoding.keys():
-        method = supported_encoding[content_encoding]
-        kw['body'] = method(kw['body'])
-        del kw['header']['content-transfer-encoding']
-    return kw
-
-  def getBodyEncoding(self):
-    """
-      Extract the charset encoding from mail header.
-    """
-    charset = None
+    # Clean up the the message data that came from the portal_mailin tool.
+    self.cleanMessage(**kw)
+
+  def _edit(self, *args, **kw):
+    XMLObject._edit(self, *args, **kw)
+    # Input body is already clean because it came from ERP5 GUI
+    self.cleanMessage(clean_body=True, **kw)
+
+  def cleanMessage(self, clean_body=False, **kw):
+    """
+      Clean up the the message data to have UTF-8 encoded body and a clean header.
+    """
+    # Get a decoded body in UTF-8
+    if clean_body == True:
+      # Assume that the inputted charset is always UTF-8 and decoded
+      new_body = kw['body']
+    else:
+      # Autodetect the charset encoding via header and get a clean body
+      new_body = self.getBody()
+    # Update the body to the clean one
+    self.body = new_body
+    # Update the charset and the encoding since the body is known has 'cleaned'
     header = self.getHeader()
+    if header != None:
+      header = self.setBodyCharsetFromDict(header, charset="utf-8")
+      header['content-transfer-encoding'] = "binary"
+    self.header = header
+
+  def getDecodedBody(self, raw_body, encoding):
+    """
+      This method return a decoded body according the given parameter.
+      This method use the global "supported_decoding" dict which contain decoded
+        methods supported by the current python environnment.
+    """
+    decoded_body = raw_body
+    if encoding in supported_decoding.keys():
+      method = supported_decoding[encoding]
+      # Is the body encoded ?
+      if method != None:
+        decoded_body = method(raw_body)
+    elif encoding not in (None, ''):
+      raise 'MailMessage Body Decoding Error', "Body encoding '%s' is not supported" % (encoding)
+    return decoded_body
+
+  def getEncodedBody(self, body, output_charset="utf-8"):
+    """
+      Return the entire body message encoded in the given charset.
+    """
+    header       = self.getHeader()
+    body_charset = self.getBodyCharsetFromDict(header)
+    if body_charset != None and body_charset.lower() != output_charset.lower():
+      unicode_body = unicode(body, body_charset)
+      return unicode_body.encode(output_charset)
+    return body
+
+  def getBodyEncodingFromDict(self, header={}):
+    """
+      Extract the encoding of the body from header metadatas.
+    """
+    encoding = None
+    if type(header) == type({}) and header.has_key('content-transfer-encoding'):
+      encoding = header['content-transfer-encoding']
+    return encoding
+
+  def getBodyCharsetFromDict(self, header):
+    """
+      Extract the charset from the header.
+    """
+    charset = "utf-8"
     if header != None and header.has_key('content-type'):
       content_type = header['content-type'].replace('\n', ' ')
       content_type_info = content_type.split(';')
@@ -126,16 +187,37 @@
           break
     return charset
 
-  def getEncodedBody(self, output_charset="utf-8"):
-    """
-      Return the entire body message encoded in the given charset.
-    """
-    body_charset = self.getBodyEncoding()
-    if body_charset == None or body_charset.lower() == output_charset.lower():
-      return self.getBody()
-    else:
-      unicode_body = unicode(self.getBody(), body_charset)
-      return unicode_body.encode(output_charset)
+  def setBodyCharsetFromDict(self, header, charset):
+    """
+      This method update charset info of the body.
+    """
+    if header != None:
+      # Update content-type where charset is stored
+      content_type_info = []
+      if header.has_key('content-type'):
+        content_type = header['content-type'].replace('\n', ' ')
+        content_type_info = content_type.split(';')
+      # Force content-type charset to UTF-8
+      new_content_type_metadata = []
+      # Get previous info
+      for ct_info in content_type_info:
+        info = ct_info.strip().lower()
+        # Bypass previous charset info
+        if not info.startswith('charset='):
+          new_content_type_metadata.append(ct_info.strip())
+      # Add a new charset info consistent with the actual body charset encoding
+      new_content_type_metadata.append("charset='%s'" % (charset))
+      # Inject new content-type in the header
+      header['content-type'] = ";\n ".join(new_content_type_metadata)
+    return header
+
+  def updateCharset(self, charset="utf-8"):
+    """
+      This method update charset info stored in the header.
+      Usefull to manually debug bad emails.
+    """
+    header = self.getHeader()
+    self.header = self.setBodyCharsetFromDict(header, charset)
 
   def getHeader(self):
     """
@@ -145,10 +227,39 @@
     if header == None or type(header) == type({}):
       return header
     elif type(header) == type(''):
-      # Must do an 'eval' because the header is a dict stored as a text (see ERP5/PropertySheet/MailMessage.py)i
+      # Must do an 'eval' because the header is a dict stored as a text (see ERP5/PropertySheet/MailMessage.py)
       return eval(header)
     else:
-      raise 'TypeError', "Type of 'header' property can't be determined."
+      raise 'TypeError', "Type of 'header' property can't be guessed."
+
+  def getBody(self):
+    """
+      Get a clean decoded body.
+    """
+    encoding = self.getBodyEncodingFromDict(self.getHeader())
+    body_string = self.getDecodedBody(self.body, encoding)
+    return self.getEncodedBody(body_string, output_charset="utf-8")
+
+  def getReplyBody(self):
+    """
+      This is used in order to respond to a mail,
+      this put a '> ' before each line of the body
+    """
+    reply_body = ''
+    body = self.getBody()
+    if type(body) is type('a'):
+      reply_body = '> ' + body.replace('\n', '\n> ')
+    return reply_body
+
+  def getReplySubject(self):
+    """
+      This is used in order to respond to a mail,
+      this put a 'Re: ' before the orignal subject
+    """
+    reply_subject = self.getTitle()
+    if reply_subject.find('Re: ') != 0:
+      reply_subject = 'Re: ' + reply_subject
+    return reply_subject
 
   def send(self, from_url=None, to_url=None, msg=None, subject=None):
     """
@@ -160,29 +271,9 @@
     if to_url == None:
       to_url = self.getSender()
     if msg is not None and subject is not None:
-      header = "From: %s\n" % from_url
-      header += "To: %s\n\n" % to_url
+      header  = "From: %s\n"    % from_url
+      header += "To: %s\n\n"    % to_url
       header += "Subject: %s\n" % subject
       header += "\n"
       msg = header + msg
       self.MailHost.send( msg )
-
-  def getReplyBody(self):
-    """
-      This is used in order to respond to a mail,
-      this put a '> ' before each line of the body
-    """
-    reply_body = ''
-    if type(self.body) is type('a'):
-      reply_body = '> ' + self.body.replace('\n','\n> ')
-    return reply_body
-
-  def getReplySubject(self):
-    """
-      This is used in order to respond to a mail,
-      this put a 'Re: ' before the orignal subject
-    """
-    reply_subject = self.getTitle()
-    if reply_subject.find('Re: ')!=0:
-      reply_subject = 'Re: ' + reply_subject
-    return reply_subject




More information about the Erp5-report mailing list