[Erp5-report] r16997 - /erp5/trunk/products/ERP5/Document/EmailDocument.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Sun Oct 14 20:35:54 CEST 2007
Author: jp
Date: Sun Oct 14 20:35:54 2007
New Revision: 16997
URL: http://svn.erp5.org?rev=16997&view=rev
Log:
Improved metadata support (file name, content type) based if possible on regular expressions.
Modified:
erp5/trunk/products/ERP5/Document/EmailDocument.py
Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=16997&r1=16996&r2=16997&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py Sun Oct 14 20:35:54 2007
@@ -41,6 +41,7 @@
from Products.CMFDefault.utils import isHTMLSafe
from email import message_from_string
+from email.Header import decode_header
from email.Utils import parsedate
from email import Encoders
from email.Message import Message
@@ -53,6 +54,8 @@
DEFAULT_TEXT_FORMAT = 'text/html'
COMMASPACE = ', '
_MARKER = []
+
+file_name_regexp = 'name="([^"]*)"'
class EmailDocument(File, TextDocument):
"""
@@ -110,10 +113,17 @@
"""
Returns the content information from the header information.
This is used by the metadata discovery system.
+
+ Header information is converted in UTF-8 since this is the standard
+ way of representing strings in ERP5.
"""
result = {}
for (name, value) in self._getMessage().items():
- result[name] = value
+ for text, encoding in decode_header(value):
+ if encoding is not None:
+ result[name] = result.get(name, '') + text.decode(encoding).encode('utf-8')
+ else:
+ result[name] = result.get(name, '') + text
return result
security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentInformationList')
@@ -134,25 +144,65 @@
if kw.has_key('Content-Disposition'):
content_disposition = kw['Content-Disposition']
if content_disposition.split(';')[0] == 'attachment':
- kw['file_name'] = content_disposition.split(';')[1].split('=')[1] # Quick hack - make this better with re
+ file_name = re.findall(file_name_regexp, content_disposition, re.MULTILINE)
+ if file_name:
+ kw['file_name'] = file_name[0]
+ else:
+ kw['file_name'] = 'attachment_%s' % i
elif content_disposition.split(';')[0] == 'inline':
- kw['file_name'] = 'inline_%s' % i
+ file_name = re.findall(file_name_regexp, content_disposition, re.MULTILINE)
+ if file_name:
+ kw['file_name'] = file_name[0]
+ else:
+ kw['file_name'] = 'inline_%s' % i
else:
kw['file_name'] = 'part_%s' % i
+ if kw.has_key('Content-Type'):
+ content_type = kw['Content-Type']
+ file_name = re.findall(file_name_regexp, content_type, re.MULTILINE)
+ if file_name: kw['file_name'] = file_name[0]
+ kw['content_type'] = content_type.split(';')[0]
result.append(kw)
i += 1
return result
security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentData')
- def getAttachmentData(self, index):
+ def getAttachmentData(self, index, REQUEST=None):
"""
Returns the decoded data of an attachment.
-
- TODO: add support for format in RESPONSE if defined
"""
i = 0
for part in self._getMessage().walk():
if index == i:
+ # This part should be handled in skin script
+ # but it was a bit easier to access items here
+ if REQUEST is not None:
+ kw = dict(part.items())
+ RESPONSE = REQUEST.RESPONSE
+ RESPONSE.setHeader('Accept-Ranges', 'bytes')
+ if kw.has_key('Content-Type'):
+ RESPONSE.setHeader('Content-Type', kw['Content-Type'])
+ content_type = kw['Content-Type']
+ elif kw.has_key('Content-type'):
+ RESPONSE.setHeader('Content-Type', kw['Content-type'])
+ content_type = kw['Content-type']
+ else:
+ content_type = None
+ if kw.has_key('Content-Disposition'):
+ content_disposition = kw['Content-Disposition']
+ elif kw.has_key('Content-disposition'):
+ content_disposition = kw['Content-disposition']
+ else:
+ content_disposition = None
+ file_name = None
+ if content_type:
+ file_name = re.findall(file_name_regexp, content_type, re.MULTILINE)
+ if content_disposition:
+ if not file_name:
+ file_name = re.findall(file_name_regexp, content_disposition, re.MULTILINE)
+ if file_name:
+ file_name = file_name[0]
+ RESPONSE.setHeader('Content-disposition', 'attachment;; filename="%s"' % file_name)
return part.get_payload(decode=1)
i += 1
return KeyError, "No attachment with index %s" % index
More information about the Erp5-report
mailing list