[Erp5-report] r34092 mame - /erp5/trunk/products/ERP5/Document/

Thu Mar 25 13:18:50 CET 2010

Author: mame
Date: Thu Mar 25 13:18:44 2010
New Revision: 34092

URL: http://svn.erp5.org?rev=34092&view=rev
Log:
-Remove Conversion API in Document.py as it is now in mixin/convertable
-Modified to include methods that returns allowed target item list for
conversion

Modified:
    erp5/trunk/products/ERP5/Document/Document.py
    erp5/trunk/products/ERP5/Document/Image.py
    erp5/trunk/products/ERP5/Document/PDFDocument.py
    erp5/trunk/products/ERP5/Document/TextDocument.py

Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================

--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Thu Mar 25 13:18:44 2010
@@ -56,6 +56,12 @@
 
 # Mixin Import
 from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
+from Products.ERP5.mixin.convertable import ConvertableMixin
+from Products.ERP5.mixin.text_convertable import TextConvertableMixin
+from Products.ERP5.mixin.base_convertable import BaseConvertableMixin
+from Products.ERP5.mixin.html_convertable import HTMLConvertableMixin
+from Products.ERP5.mixin.metadata_discoverable import MetadataDiscoverableMixin
+from Products.ERP5.mixin.document import DocumentMixin
 
 _MARKER = []
 VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -306,7 +312,8 @@
     return method()
 
 
-class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
+class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConvertableMixin, TextConvertableMixin,HTMLConvertableMixin,
+          DocumentMixin, BaseConvertableMixin, MetadataDiscoverableMixin, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
   """Document is an abstract class with all methods related to document
   management in ERP5. This includes searchable text, explicit relations,
   implicit relations, metadata, versions, languages, etc.
@@ -1077,267 +1084,6 @@
     method = self._getTypeBasedMethod('finishIngestion', fallback_script_id='Document_finishIngestion')
     return method()
 
-  # Conversion methods
-  security.declareProtected(Permissions.AccessContentsInformation, 'convert')
-  def convert(self, format, **kw):
-    """
-      Main content conversion function, returns result which should
-      be returned and stored in cache.
-      format - the format specied in the form of an extension
-      string (ex. jpeg, html, text, txt, etc.)
-      **kw can be various things - e.g. resolution
-
-      Default implementation returns an empty string (html, text)
-      or raises an error.
-
-      TODO:
-      - implement guards API so that conversion to certain
-        formats require certain permission
-    """
-    if format == 'html':
-      return 'text/html', '' # XXX - Why ?
-    if format in ('text', 'txt'):
-      return 'text/plain', '' # XXX - Why ?
-    raise NotImplementedError
-
-  security.declareProtected(Permissions.View, 'asSubjectText')
-  def asSubjectText(self, **kw):
-    """
-      Converts the subject of the document to a textual representation.
-    """
-    subject = self.getSubject()
-    if not subject:
-      # XXX not sure if this fallback is a good idea.
-      subject = self.getTitle()
-    if subject is None:
-      subject = ''
-    return str(subject)
-
-  security.declareProtected(Permissions.View, 'asText')
-  def asText(self, **kw):
-    """
-      Converts the content of the document to a textual representation.
-    """
-    kw['format'] = 'txt'
-    mime, data = self.convert(**kw)
-    return str(data)
-
-  security.declareProtected(Permissions.View, 'asEntireHTML')
-  def asEntireHTML(self, **kw):
-    """
-      Returns a complete HTML representation of the document
-      (with body tags, etc.). Adds if necessary a base
-      tag so that the document can be displayed in an iframe
-      or standalone.
-
-      Actual conversion is delegated to _asHTML
-    """
-    html = self._asHTML(**kw)
-    if self.getUrlString():
-      # If a URL is defined, add the base tag
-      # if base is defined yet.
-      html = str(html)
-      if not html.find('<base') >= 0:
-        base = '<base href="%s">' % self.getContentBaseURL()
-        html = html.replace('<head>', '<head>%s' % base)
-      self.setConversion(html, mime='text/html', format='base-html')
-    return html
-
-  security.declarePrivate('_asHTML')
-  def _asHTML(self, **kw):
-    """
-      A private method which converts to HTML. This method
-      is the one to override in subclasses.
-    """
-    if not self.hasBaseData():
-      raise ConversionError('This document has not been processed yet.')
-    try:
-      # FIXME: no substitution may occur in this case.
-      mime, data = self.getConversion(format='base-html')
-      return data
-    except KeyError:
-      kw['format'] = 'html'
-      mime, html = self.convert(**kw)
-      return html
-
-  security.declareProtected(Permissions.View, 'asStrippedHTML')
-  def asStrippedHTML(self, **kw):
-    """
-      Returns a stripped HTML representation of the document
-      (without html and body tags, etc.) which can be used to inline
-      a preview of the document.
-    """
-    if not self.hasBaseData():
-      return ''
-    try:
-      # FIXME: no substitution may occur in this case.
-      mime, data = self.getConversion(format='stripped-html')
-      return data
-    except KeyError:
-      kw['format'] = 'html'
-      mime, html = self.convert(**kw)
-      return self._stripHTML(str(html))
-
-  def _guessEncoding(self, string):
-    """
-      Try to guess the encoding for this string.
-      Returns None if no encoding can be guessed.
-    """
-    try:
-      import chardet
-    except ImportError:
-      return None
-    return chardet.detect(string).get('encoding', None)
-
-  def _stripHTML(self, html, charset=None):
-    """
-      A private method which can be reused by subclasses
-      to strip HTML content
-    """
-    body_list = re.findall(self.body_parser, str(html))
-    if len(body_list):
-      stripped_html = body_list[0]
-    else:
-      stripped_html = html
-    # find charset and convert to utf-8
-    charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this
-                                         # is datastream instance but hard to do better
-    if charset and not charset_list:
-      # Use optional parameter is we can not find encoding in HTML
-      charset_list = [charset]
-    if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
-      try:
-        stripped_html = unicode(str(stripped_html),
-                                charset_list[0]).encode('utf-8')
-      except (UnicodeDecodeError, LookupError):
-        return str(stripped_html)
-    return stripped_html
-
-  def _safeHTML(self, html, format='text/x-html-safe', charset=None):
-    """
-      A private method to strip HTML content in safe mode,
-      w/o emmbed javascript, forms and any external plugins imports.
-      This should be used when we do not trust the user (Anonymous)
-      who push data into database.
-      - html: content to strip
-      - format: destination format
-      - charset: charset used to encode string. Take precedence
-      on charset values found in html string
-    """
-    portal = self.getPortalObject()
-    if charset is None:
-      # find charset
-      charset_list = self.charset_parser.findall(html)
-      if charset_list:
-        charset = charset_list[0]
-    if charset and charset not in ('utf-8', 'UTF-8'):
-      try:
-        safe_html_string = html.decode(charset).encode('utf-8')
-      except (UnicodeDecodeError, LookupError):
-        pass
-      else:
-        charset = 'utf-8' # Override charset if convertion succeeds
-    transform_tool = getToolByName(portal, 'portal_transforms')
-    safe_html_string = transform_tool.convertToData(format, html,
-                                                    encoding=charset)
-    return safe_html_string
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
-  def getContentInformation(self):
-    """
-    Returns the content information from the HTML conversion.
-    The default implementation tries to build a dictionnary
-    from the HTML conversion of the document and extract
-    the document title.
-    """
-    result = {}
-    html = self.asEntireHTML()
-    if not html: return result
-    title_list = re.findall(self.title_parser, str(html))
-    if title_list:
-      result['title'] = title_list[0]
-    return result
-
-  # Base format support
-  security.declareProtected(Permissions.ModifyPortalContent, 'convertToBaseFormat')
-  def convertToBaseFormat(self, **kw):
-    """
-      Converts the content of the document to a base format
-      which is later used for all conversions. This method
-      is common to all kinds of documents and handles
-      exceptions in a unified way.
-
-      Implementation is delegated to _convertToBaseFormat which
-      must be overloaded by subclasses of Document which
-      need a base format.
-
-      convertToBaseFormat is called upon file upload, document
-      ingestion by the processing_status_workflow.
-
-      NOTE: the data of the base format conversion should be stored
-      using the base_data property. Refer to Document.py propertysheet.
-      Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
-    """
-    if getattr(self, 'hasData', None) is not None and not self.hasData():
-      # Empty document cannot be converted
-      return
-    try:
-      message = self._convertToBaseFormat() # Call implemetation method
-      self.clearConversionCache() # Conversion cache is now invalid
-      if message is None:
-        # XXX Need to translate.
-        message = 'Converted to %s.' % self.getBaseContentType()
-      self.convertFile(comment=message) # Invoke workflow method
-    except NotImplementedError:
-      message = ''
-    return message
-
-  def _convertToBaseFormat(self):
-    """
-    """
-    raise NotImplementedError
-
-  security.declareProtected(Permissions.AccessContentsInformation,
-                            'isSupportBaseDataConversion')
-  def isSupportBaseDataConversion(self):
-    """
-    """
-    return False
-
-  def convertFile(self, **kw): # XXX - It it really useful to explicitly define ?
-    """
-    Workflow transition invoked when conversion occurs.
-    """
-  convertFile = WorkflowMethod(convertFile)
-
-  security.declareProtected(Permissions.AccessContentsInformation,
-                            'getMetadataMappingDict')
-  def getMetadataMappingDict(self):
-    """
-    Return a dict of metadata mapping used to update base metadata of the
-    document
-    """
-    try:
-      method = self._getTypeBasedMethod('getMetadataMappingDict')
-    except KeyError, AttributeError:
-      method = None
-    if method is not None:
-      return method()
-    else:
-      return {}
-
-  security.declareProtected(Permissions.ModifyPortalContent, 'updateBaseMetadata')
-  def updateBaseMetadata(self, **kw):
-    """
-    Update the base format data with the latest properties entered
-    by the user. For example, if title is changed in ERP5 interface,
-    the base format file should be updated accordingly.
-
-    Default implementation does nothing. Refer to OOoDocument class
-    for an example of implementation.
-    """
-    pass
-
   # Transformation API
   security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
   def populateContent(self):

Modified: erp5/trunk/products/ERP5/Document/Image.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Image.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Image.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Image.py [utf8] Thu Mar 25 13:18:44 2010
@@ -53,6 +53,8 @@
 from zLOG import LOG, WARNING
 
 from Products.CMFCore.utils import getToolByName
+#Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
 
 default_displays_id_list = ('nano', 'micro', 'thumbnail',
                             'xsmall', 'small', 'medium',
@@ -60,7 +62,7 @@
 
 default_formats = ['jpg', 'jpeg', 'png', 'gif', 'pnm', 'ppm']
 
-class Image(File, OFSImage):
+class Image(File, OFSImage, ConvertableMixin):
   """
     An Image is a File which contains image data. It supports
     various conversions of format, size, resolution through
@@ -322,11 +324,36 @@
     return mime_type, result
 
   # Conversion API
+  security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+  def getAllowedTargetItemList(self):
+    import commands
+    import re
+    import os
+    new_result = []
+    filename = os.path.abspath(self.getSourceReference())
+    result = commands.getstatusoutput('convert -list format %s ' % self.getSourceReference())
+    new_list = re.split('\n',result[1])
+    allowed = []
+    for new_str in new_list:
+      test_str = new_str.lstrip()
+      pattern = re.compile(r'''([A-z]+[*]?\s+[A-z]+\s+[rw+-]+\s+[A-z]+\s+[A-z]+\D+[A-z]+)''',re.VERBOSE)
+      if re.match(pattern,test_str):
+        new_result.append(test_str)
+    
+    len_new_result = len(new_result)
+    for i in range(0,len_new_result):
+      allowed.append(list((new_result[i].split()[1].lower(),' '.join(new_result[i].split()[3:])))) 
+    return [(y, x) for x, y in allowed]
+
   security.declareProtected(Permissions.AccessContentsInformation, 'convert')
   def convert(self, format, display=None, quality=75, resolution=None, frame=None, **kw):
     """
     Implementation of conversion for Image files
     """
+    # Raise an error if the format is not permitted
+    if not self.isTargetFormatPermitted(format):
+      raise Unauthorized("User does not have enough permission to access document"
+				     " in %s format" % (format or 'original'))
     if format in ('text', 'txt', 'html', 'base_html', 'stripped-html'):
       try:
         return self.getConversion(format=format)
@@ -339,7 +366,7 @@
     if (display is not None or resolution is not None or quality != 75 or format != ''\
                             or frame is not None) and image_size:
       kw = dict(display=display, format=format, quality=quality,
-                resolution=resolution, frame=frame, image_size=image_size)
+               resolution=resolution, frame=frame, image_size=image_size)
       try:
         mime, image = self.getConversion(**kw)
       except KeyError:
@@ -369,7 +396,7 @@
     # display may be set from a cookie (?)
     image_size = self.getSizeFromImageDisplay(display)
     kw = dict(display=display, format=format, quality=quality,
-              resolution=resolution, frame=frame, image_size=image_size)
+		  resolution=resolution, frame=frame, image_size=image_size)
     _setCacheHeaders(_ViewEmulator().__of__(self), kw)
 
     if (display is not None or resolution is not None or quality != 75 or format != ''\

Modified: erp5/trunk/products/ERP5/Document/PDFDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/PDFDocument.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] Thu Mar 25 13:18:44 2010
@@ -37,7 +37,13 @@
 from Products.ERP5.Document.Document import ConversionError
 from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
 
-class PDFDocument(Image, CachedConvertableMixin):
+
+from zLOG import LOG, WARNING
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
+
+
+class PDFDocument(Image, ConvertableMixin, CachedConvertableMixin):
   """
   PDFDocument is a subclass of Image which is able to
   extract text content from a PDF file either as text
@@ -98,6 +104,11 @@
                             resolution=resolution, frame=frame)
 
   # Conversion API
+  security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+  def getAllowedTargetItemList(self):
+    return Image.getAllowedTargetItemList(self) + \
+       [('Text', 'txt'),('Plain Text','text'), ('HTML Document', 'html')]
+  
   security.declareProtected(Permissions.AccessContentsInformation, 'convert')
   def convert(self, format, **kw):
     """

Modified: erp5/trunk/products/ERP5/Document/TextDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/TextDocument.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] Thu Mar 25 13:18:44 2010
@@ -43,9 +43,12 @@
 except ImportError:
   from Products.ERP5Type.patches.string import Template
 
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
+
 DEFAULT_TEXT_FORMAT = 'text/html'
 
-class TextDocument(Document, TextContent):
+class TextDocument(Document, TextContent, ConvertableMixin):
     """
         A Document contains text which can be formatted using
         *Structured Text* or *HTML*. Text can be automatically translated
@@ -146,6 +149,10 @@
       if format is None:
         # The default is to use ERP5 Forms to render the page
         return self.view()
+      # Raise an error if the format is not permitted
+      if not self.isTargetFormatPermitted(format):
+	raise Unauthorized("User does not have enough permission to access document"
+				" in %s format" % (format or 'original'))
       mime, data = self.convert(format=format) 
       RESPONSE.setHeader('Content-Length', len(str(data))) # XXX - Not efficient 
                                                            # if datastream instance
@@ -200,12 +207,25 @@
         substitution_method_parameter_dict = {}
       return self._substituteTextContent(subject, safe_substitute=safe_substitute,
                                          **substitution_method_parameter_dict)
-
+    
+    security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+    def getAllowedTargetItemList(self):
+      mime_type = getToolByName(self, 'mimetypes_registry')
+      allowed=[]
+      for extension in mime_type.extensions:
+        allowed.append((mime_type.extensions[extension].name(),extension))
+
+      return [(y, x) for x, y in allowed]
+    
     security.declareProtected(Permissions.AccessContentsInformation, 'convert')
     def convert(self, format, substitution_method_parameter_dict=None, safe_substitute=True, **kw):
       """
         Convert text using portal_transforms or oood
       """
+      # Raise an error if the format is not permitted
+      if not self.isTargetFormatPermitted(format):
+	raise Unauthorized("User does not have enough permission to access document"
+					 " in %s format" % (format or 'original'))
       # Accelerate rendering in Web mode
       _setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
       # Return the raw content