[Erp5-report] r27932 - /erp5/trunk/products/ERP5/Document/Document.py

Fri Jul 3 19:08:22 CEST 2009

Author: nicolas
Date: Fri Jul  3 19:08:22 2009
New Revision: 27932

URL: http://svn.erp5.org?rev=27932&view=rev
Log:
* Clean docstrings as they has moved to IDocumennt interface
* Conversion Cache Documents are not stored anymore on Document itself.
  Use Cache Plugin to handle Storage of conversion.
  Flare is used by default as a persistent storage

Modified:
    erp5/trunk/products/ERP5/Document/Document.py

Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=27932&r1=27931&r2=27932&view=diff
==============================================================================

--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Fri Jul  3 19:08:22 2009
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 ##############################################################################
 #
 # Copyright (c) 2002 Nexedi SARL and Contributors. All Rights Reserved.
@@ -44,11 +45,13 @@
 from Products.ERP5Type.Base import WorkflowMethod
 from Products.ERP5Type.TransactionalVariable import getTransactionalVariable
 from Products.ERP5Type.ExtensibleTraversable import ExtensibleTraversableMixIn
-from Products.ERP5Type.Cache import getReadOnlyTransactionCache
+from Products.ERP5Type.Cache import getReadOnlyTransactionCache, DEFAULT_CACHE_SCOPE
 from Products.ERP5.Document.Url import UrlMixIn
 from Products.ERP5.Tool.ContributionTool import MAX_REPEAT
 from Products.ERP5Type.UnrestrictedMethod import UnrestrictedMethod
 from AccessControl import Unauthorized
+import zope.interface
+import string
 
 _MARKER = []
 VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -59,6 +62,12 @@
   items = kw.items()
   items.sort()
   return tuple(items)
+
+def generateCacheId(**kw):
+  """Generate proper cache id based on **kw.
+  Function inspired from ERP5Type.Cache
+  """
+  return str(makeSortedTuple(kw)).translate(string.maketrans('', ''), """[]()<>'", """)
 
 class SnapshotMixin:
   """
@@ -115,134 +124,94 @@
     Versions are stored in dictionaries; the class stores also
     generation time of every format and its mime-type string.
     Format can be a string or a tuple (e.g. format, resolution).
-
-    TODO:
-    * Implement ZODB BLOB
   """
-  # time of generation of various formats
-  _cached_time = None # Defensive programming - prevent caching to RAM
-  # generated files (cache)
-  _cached_data = None # Defensive programming - prevent caching to RAM
-  # mime types for cached formats XXX to be refactored
-  _cached_mime = None # Defensive programming - prevent caching to RAM
 
   # Declarative security
   security = ClassSecurityInfo()
   security.declareObjectProtected(Permissions.AccessContentsInformation)
 
+  def _getCacheFactory(self):
+    """
+    """
+    cache_tool = getToolByName(self, 'portal_caches')
+    preference_tool = getToolByName(self, 'portal_preferences')
+    cache_factory_name = preference_tool.getPreferredConversionCacheFactory()
+    return cache_tool.getRamCacheRoot().get(cache_factory_name)
+
   security.declareProtected(Permissions.ModifyPortalContent, 'clearConversionCache')
   def clearConversionCache(self):
     """
-    Clear cache (invoked by interaction workflow upon file upload
-    needed here to overwrite class attribute with instance attrs
-    """
-    self._cached_time = PersistentMapping()
-    self._cached_data = PersistentMapping()
-    self._cached_size = PersistentMapping()
-    self._cached_mime = PersistentMapping()
+    """
+    for cache_plugin in self._getCacheFactory().getCachePluginList():
+      cache_plugin.delete(self.getPath(), DEFAULT_CACHE_SCOPE)
 
   security.declareProtected(Permissions.View, 'updateConversionCache')
   def updateConversionCache(self):
-    aself = aq_base(self)
-    if getattr(aself, '_cached_time', None) is None or self._cached_time is None:
-      self._cached_time = PersistentMapping()
-    if getattr(aself, '_cached_data', None) is None or self._cached_data is None:
-      self._cached_data = PersistentMapping()
-    if getattr(aself, '_cached_size', None) is None or self._cached_size is None:
-      self._cached_size = PersistentMapping()
-    if getattr(aself, '_cached_mime', None) is None or self._cached_mime is None:
-      self._cached_mime = PersistentMapping()
+    """
+    """
+    cache_factory = self._getCacheFactory()
+    cache_duration = cache_factory.cache_duration
+    for cache_plugin in cache_factory.getCachePluginList():
+      cache_plugin.initCacheStorage()
+      cache_dict = cache_plugin.get(self.getPath(), DEFAULT_CACHE_SCOPE)
+      if cache_dict is None:
+        cache_dict = {}
+        cache_plugin.set(self.getPath(), DEFAULT_CACHE_SCOPE, cache_dict, cache_duration=cache_duration)
 
   security.declareProtected(Permissions.View, 'hasConversion')
-  def hasConversion(self, **format):
-    """
-      Checks whether we have a version in this format
+  def hasConversion(self, **kw):
+    """
     """
     self.updateConversionCache()
-    return self._cached_data.has_key(makeSortedTuple(format))
-
-  security.declareProtected(Permissions.View, 'getCacheTime')
-  def getCacheTime(self, **format):
-    """
-      Checks when if ever was the file produced
+    cache_id = generateCacheId(**kw)
+    plugin_list = self._getCacheFactory().getCachePluginList()
+    #If there is no plugin list return False OR one them is doesn't contain
+    #cache_id for givent scope, return False
+    if not plugin_list:
+      return False
+    for cache_plugin in plugin_list:
+      cache_entry = cache_plugin.get(self.getPath(), DEFAULT_CACHE_SCOPE)
+      if not cache_entry.getValue().has_key(cache_id):
+        return False
+    return True
+
+  security.declareProtected(Permissions.ModifyPortalContent, 'setConversion')
+  def setConversion(self, data, mime=None, calculation_time=None, **kw):
+    """
     """
     self.updateConversionCache()
-    return self._cached_time.get(makeSortedTuple(format), 0)
-
-  security.declareProtected(Permissions.ModifyPortalContent, 'updateConversion')
-  def updateConversion(self, **format):
+    cache_id = generateCacheId(**kw)
+    cache_factory = self._getCacheFactory()
+    cache_duration = cache_factory.cache_duration
+    if data is not None:
+      for cache_plugin in cache_factory.getCachePluginList():
+        cache_entry = cache_plugin.get(self.getPath(), DEFAULT_CACHE_SCOPE)
+        cache_dict = cache_entry.getValue()
+        cache_dict.update({cache_id: (mime, aq_base(data))})
+        cache_plugin.set(self.getPath(), DEFAULT_CACHE_SCOPE,
+                         cache_dict, calculation_time=calculation_time,
+                         cache_duration=cache_duration)
+
+  security.declareProtected(Permissions.View, 'getConversion')
+  def getConversion(self, **kw):
+    """
+    """
     self.updateConversionCache()
-    self._cached_time[makeSortedTuple(format)] = DateTime()
-
-  security.declareProtected(Permissions.ModifyPortalContent, 'setConversion')
-  def setConversion(self, data, mime=None, **format):
-    """
-    Saves a version of the document in a given format; records mime type
-    and conversion time (which is right now).
-    """
-    self.updateConversionCache()
-    tformat = makeSortedTuple(format)
-    if mime is not None:
-      self._cached_mime[tformat] = mime
-    if data is not None:
-      self._cached_data[tformat] = aq_base(data) # Use of aq_base 
-        # is useful to remove the wrapper from a temp object
-        # which may have been used to generate data
-      self.updateConversion(**format)
-      self._cached_size[tformat] = len(data)
-    else:
-      self._cached_size[tformat] = 0
-    self._p_changed = 1
-
-  security.declareProtected(Permissions.View, 'getConversion')
-  def getConversion(self, **format):
-    """
-    Returns version of the document in a given format, if it has it; otherwise
-    returns empty string (the caller should check hasConversion before calling
-    this function.
-
-    (we could be much cooler here - pass testing and updating methods to this function
-    so that it does it all by itself; this'd eliminate the need for setConversion public method)
-    XXX-BG: I'm not sure now what I meant by this...
-    """
-    self.updateConversionCache()
-    tformat = makeSortedTuple(format)
-    return self._cached_mime[tformat], self._cached_data[tformat]
+    cache_id = generateCacheId(**kw)
+    for cache_plugin in self._getCacheFactory().getCachePluginList():
+      cache_entry = cache_plugin.get(self.getPath(), DEFAULT_CACHE_SCOPE)
+      data = cache_entry.getValue().get(cache_id)
+      if data:
+        return data
+    raise KeyError, 'Conversion cache key does not exists for %r' % cache_id
 
   security.declareProtected(Permissions.View, 'getConversionSize')
-  def getConversionSize(self, **format):
-    """
-    Returns the size of the converted document.
-    """
-    self.updateConversionCache()
-    tformat = makeSortedTuple(format)
-    if not self._cached_size.has_key(tformat):
-      self._cached_size[tformat] = len(self._cached_data[tformat])
-    return self._cached_size[tformat]
-
-  security.declareProtected(Permissions.ViewManagementScreens, 'getConversionCacheInfo')
-  def getConversionCacheInfo(self):
-    """
-    Get cache details as string (for debugging)
-    """
-    self.updateConversionCache()
-    s = 'CACHE INFO:<br/><table><tr><td>format</td><td>size</td><td>time</td><td>is changed</td></tr>'
-    for f in self._cached_time.keys():
-      t = self._cached_time[f]
-      data = self._cached_data.get(f)
-      if data:
-        if isinstance(data, str):
-          ln = len(data)
-        else:
-          ln = 0
-          while data is not None:
-            ln += len(data.data)
-            data = data.next
-      else:
-        ln = 'no data!!!'
-      s += '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (f, str(ln), str(t), '-')
-    s += '</table>'
-    return s
+  def getConversionSize(self, **kw):
+    """
+    """
+    if self.hasConversion(**kw):
+      return len(self.getConversion(**kw))
+    return 0
 
 class PermanentURLMixIn(ExtensibleTraversableMixIn):
   """
@@ -458,167 +427,6 @@
 
 class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin, UpdateMixIn):
   """
-      Document is an abstract class with all methods
-      related to document management in ERP5. This includes
-      searchable text, explicit relations, implicit relations,
-      metadata, versions, languages, etc.
-
-      Documents may either store their content directly or
-      cache content which is retrieved from a specified URL.
-      The second case if often referred as "External Document".
-      Standalone "External Documents" may be created by specifying
-      a URL to the contribution tool which is in charge of initiating
-      the download process and selecting the appropriate document type.
-      Groups of "External Documents" may also be generated from
-      so-called "External Source" (refer to ExternalSource class
-      for more information).
-
-      External Documents may be downloaded once or updated at
-      regular interval. The later can be useful to update the content
-      of an external source. Previous versions may be stored
-      in place or kept in a separate file. This feature
-      is known as the crawling API. It is mostly implemented
-      in ContributionTool with wrappers in the Document class.
-      It can be useful for create a small search engine.
-
-      There are currently two types of Document subclasses:
-
-      * File for binary file based documents. File
-        has subclasses such as Image, OOoDocument,
-        PDFDocument, etc. to implement specific conversion
-        methods.
-
-      * TextDocument for text based documents. TextDocument
-        has subclasses such as Wiki to implement specific
-        methods. TextDocument itself has a subclass
-        (XSLTDocument) which provides XSLT based analysis
-        and transformation of XML content based on XSLT
-        templates. 
-
-      Document classes which implement conversion should use
-      the ConversionCacheMixin class so that converted values are
-      stored inside ZODB and do not need to be recalculated.
-      More generally, conversion should be achieved through
-      the convert method and other methods of the conversion
-      API (convertToBaseFormat, etc.). Moreover, any Document
-      subclass must ne able to convert documents to text
-      (asText method) and HTML (asHTML method). Text is required
-      for full text indexing. HTML is required for crawling.
-
-      Instances can be created directly, or via portal_contributions tool
-      which manages document ingestion process whereby a file can be uploaded
-      by http or sent in by email or dropped in by webdav or in some other
-      way as yet unknown. The ingestion process has the following steps:
-
-      (1) portal type detection
-      (2) object creation and upload of data
-      (3) metadata discovery (optionally with conversion of data to another format)
-      (4) other possible actions to finalise the ingestion (ex. by assigning
-          a reference)
-
-      This class handles (3) and calls a ZMI script to do (4).
-
-      Metadata can be drawn from various sources:
-
-      input      -   data supplied with http request or set on the object during (2) (e.g.
-                     discovered from email text)
-      file_name  -   data which might be encoded in file name
-      user_login -   information about user who is contributing the file
-      content    -   data which might be derived from document content
-
-      If a certain property is defined in more than one source, it is set according to
-      preference order returned by a script 
-         Document_getPreferredDocumentMetadataDiscoveryOrderList
-         (or any type-based version since discovery is type dependent)
-
-      Methods for discovering metadata are:
-
-        getPropertyDictFromInput
-        getPropertyDictFromFileName
-        getPropertyDictFromUserLogin
-        getPropertyDictFromContent
-
-      Methods for processing content are implemented either in 
-      Document class or in Base class:
-
-        getSearchableReferenceList (Base)
-        getSearchableText (Base)
-        index_html (overriden in Document subclasses)
-
-      Methods for handling relations are implemented either in 
-      Document class or in Base class:
-
-        getImplicitSuccessorValueList (Base)
-        getImplicitPredecessorValueList (Base)
-        getImplicitSimilarValueList (Base)
-        getSimilarCloudValueList (Document)
-
-      Implicit relations consist in finding document references inside
-      searchable text (ex. INV-23456) and deducting relations from that.
-      Two customisable methods required. One to find a list of implicit references
-      inside the content (getSearchableReferenceList) and one to convert a given
-      document reference into a list of reference strings which could
-      be present in other content (asSearchableReferenceList).
-
-      document.getSearchableReferenceList() returns
-        [
-         {'reference':' INV-12367'},
-         {'reference': 'INV-1112', 'version':'012}', 
-         {'reference': 'AB-CC-DRK', 'version':'011', 'language': 'en'}
-        ]
-
-      The Document class behaviour can be extended / customized through scripts
-      (which are type-based so can be adjusted per portal type).
-
-      * Document_getPropertyDictFromUserLogin - finds a user (by user_login or from session)
-        and returns properties which should be set on the document
-
-      * Document_getPropertyDictFromContent - analyzes document content and returns
-        properties which should be set on the document
-
-      * Base_getImplicitSuccessorValueList - finds appropriate all documents
-        referenced in the current content
-
-      * Base_getImplicitPredecessorValueList - finds document predecessors based on
-        the document coordinates (can use only complete coordinates, or also partial)
-
-      * Document_getPreferredDocumentMetadataDiscoveryOrderList - returns an order
-        in which metadata should be set/overwritten
-
-      * Document_finishIngestion - called by portal_activities after all the ingestion
-        is completed (and after document has been converted, so text_content
-        is available if the document has it)
-
-      * Document_getNewRevision - calculates revision number which should be set
-        on this document. Implementation depends on revision numbering policy which
-        can be very different. Interaction workflow should call setNewRevision method.
-
-      * Document_populateContent - analyses the document content and produces
-        subcontent based on it (ex. images, news, etc.). This scripts can
-        involve for example an XSLT transformation to process XML.
-
-      Subcontent: documents may include subcontent (files, images, etc.)
-      so that publication of rich content can be path independent. Subcontent
-      can also be used to help the rendering in HTML of complex documents
-      such as ODF documents.
-
-    Consistency checking:
-      Default implementation uses DocumentReferenceConstraint to check if the 
-      reference/language/version triplet is unique. Additional constraints
-      can be added if necessary.
-
-    NOTE: Document.py supports a notion of revision which is very specific.
-    The underlying concept is that, as soon as a document has a reference,
-    the association of (reference, version, language) must be unique
-    accross the whole system. This means that a given document in a given
-    version in a given language is unique. The underlying idea is similar
-    to the one in a Wiki system in which each page is unique and acts
-    the the atom of collaboration. In the case of ERP5, if a team collaborates
-    on a Text document written with an offline word processor, all
-    updates should be placed inside the same object. A Contribution
-    will thus modify an existing document, if allowed from security
-    point of view, and increase the revision number. Same goes for
-    properties (title). Each change generates a new revision.
   """
 
   meta_type = 'ERP5 Document'
@@ -628,6 +436,8 @@
   isRADContent = 1
   isDocument = 1
   __dav_collection__=0
+
+  zope.interface.implements( interfaces.IDocument, )
 
   # Regular expressions
   href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE)
@@ -1300,7 +1110,7 @@
         html = html.replace('<head>', '<head>%s' % base)
       # We do not implement cache yet since it increases ZODB
       # for probably no reason. More research needed
-      # self.setConversion(html, mime='text/html', format='base-html')
+      self.setConversion(html, mime='text/html', format='base-html')
     return html
 
   security.declarePrivate('_asHTML')
@@ -1421,19 +1231,6 @@
 
   def _convertToBaseFormat(self):
     """
-      Placeholder method. Must be subclassed by classes
-      which need a base format. Refer to OOoDocument
-      for an example of ODF base format which is used
-      as a way to convert about any file format into
-      about any file format.
-
-      Other possible applications: conversion of HTML
-      text to tiddy HTML such as described here:
-      http://www.xml.com/pub/a/2004/09/08/pyxml.html
-      so that resulting text can be processed more
-      easily by XSLT parsers. Conversion of internal
-      links to images of an HTML document to local
-      links (in combindation with populate).
     """
     raise NotImplementedError
 
@@ -1441,8 +1238,6 @@
                             'isSupportBaseDataConversion')
   def isSupportBaseDataConversion(self):
     """
-    This is a public interface to check a document that is support conversion
-    to base format and can be overridden in subclasses.
     """
     return False