[Erp5-report] r38375 nicolas - /erp5/trunk/products/ERP5/Tool/ContributionTool.py

Tue Sep 14 22:25:25 CEST 2010

Author: nicolas
Date: Tue Sep 14 22:25:22 2010
New Revision: 38375

URL: http://svn.erp5.org?rev=38375&view=rev
Log:
Improve filename detection from http requests.
  - First read content-disposition header
  - if not found, then read real url used to download the file.
    Take into accounts HTTP redirections.
small refactoring to use dedicated utilities to parse urls and extract filename

Modified:
    erp5/trunk/products/ERP5/Tool/ContributionTool.py

Modified: erp5/trunk/products/ERP5/Tool/ContributionTool.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Tool/ContributionTool.py?rev=38375&r1=38374&r2=38375&view=diff
==============================================================================

--- erp5/trunk/products/ERP5/Tool/ContributionTool.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Tool/ContributionTool.py [utf8] Tue Sep 14 22:25:22 2010
@@ -33,6 +33,9 @@ import string
 import socket
 import md5
 import urllib2, urllib
+import urlparse
+from cgi import parse_header
+import os
 
 from AccessControl import ClassSecurityInfo, getSecurityManager
 from Products.ERP5Type.Globals import InitializeClass, DTMLFile
@@ -166,17 +169,29 @@ class ContributionTool(BaseTool):
       file = cStringIO.StringIO()
       file.write(data)
       file.seek(0)
-      # Create a file name based on the URL and quote it
-      file_name = url.split('/')[-1] or url.split('/')[-2]
-      file_name = urllib.quote(file_name, safe='')
-      file_name = file_name.replace('%', '')
+      # if a content-disposition header is present,
+      # try first to read the suggested filename from it.
+      header_info = url_file.info()
+      content_disposition = header_info.getheader('content-disposition', '')
+      file_name = parse_header(content_disposition)[1].get('filename')
+      if not file_name:
+        # Now read the filename from url.
+        # In case of http redirection, the real url must be read
+        # from file object returned by urllib2.urlopen.
+        # It can happens when the header 'Location' is present in request.
+        # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.30
+        url = url_file.geturl()
+        # Create a file name based on the URL and quote it
+        file_name = urlparse.urlsplit(url)[-3]
+        file_name = os.path.basename(file_name)
+        file_name = urllib.quote(file_name, safe='')
+        file_name = file_name.replace('%', '')
       # For URLs, we want an id by default equal to the encoded URL
-      if id is None: id = self.encodeURL(url)
-      if hasattr(url_file, 'headers'):
-        headers = url_file.headers
-        if hasattr(headers, 'type'):
-          mime_type = headers.type
-          kw['content_type'] = mime_type
+      if id is None:
+        id = self.encodeURL(url)
+      content_type = header_info.gettype()
+      if content_type:
+        kw['content_type'] = content_type
       kw['file'] = file
 
     # If the portal_type was provided, we can go faster