[Erp5-report] r15682 - /erp5/trunk/products/ERP5/Document/Document.py

Thu Aug 16 02:39:42 CEST 2007

Author: jp
Date: Thu Aug 16 02:39:42 2007
New Revision: 15682

URL: http://svn.erp5.org?rev=15682&view=rev
Log:
Support for the new crawling API as well as detection of index pages.

Modified:
    erp5/trunk/products/ERP5/Document/Document.py

Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=15682&r1=15681&r2=15682&view=diff
==============================================================================

--- erp5/trunk/products/ERP5/Document/Document.py (original)
+++ erp5/trunk/products/ERP5/Document/Document.py Thu Aug 16 02:39:42 2007
@@ -37,8 +37,7 @@
 from Products.CMFCore.utils import getToolByName, _checkPermission
 from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
 from Products.ERP5Type.XMLObject import XMLObject
-from Products.ERP5Type.WebDAVSupport import TextContent
-from Products.ERP5Type.Message import Message
+from Products.ERP5Type.DateUtils import convertDateToHour, number_of_hours_in_day, number_of_hours_in_year
 from Products.ERP5Type.Utils import convertToUpperCase, convertToMixedCase
 from Products.ERP5.Document.Url import UrlMixIn
 from Products.ERP5.Tool.ContributionTool import MAX_REPEAT
@@ -477,11 +476,11 @@
       if val is None:
         val = self.getProperty(property)
         if val is not None and val != '':
-          val = [val]
+          val = [str(val)]
         else:
           val = []
       else:
-        val = list(val)
+        val = [str(v) for v in list(val) if v is not None]
       return val
 
     searchable_text = reduce(add, map(lambda x: getPropertyListOrValue(x),
@@ -1240,6 +1239,22 @@
     """
     self.portal_contributions.crawlContent(self)
 
+  security.declareProtected(Permissions.View, 'isIndexContent')
+  def isIndexContent(self, container=None):
+    """
+      Ask container if we are and index, or a content.
+      In the vast majority of cases we are content.
+      This method is required in a crawling process to make
+      a difference between URLs which return an index (ex. the 
+      list of files in remote server which is accessed through HTTP)
+      and the files themselves.
+    """
+    if container is None:
+      container = self.getParentValue()
+    if hasattr(aq_base(container), 'isIndexContent'):
+      return container.isIndexContent(self)
+    return False
+
   security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL')
   def getContentBaseURL(self):
     """
@@ -1255,12 +1270,46 @@
         base_url = '/'.join(base_url_list[:-1])
     return base_url
 
-  # Alarm date calculation - this method should be moved out ASAP
-  security.declareProtected(Permissions.AccessContentsInformation, 'getNextAlarmDate')
-  def getNextAlarmDate(self):
-    """
-    This method is only there to have something to test.
-    Serious refactoring of Alarm, Periodicity and CalendarPeriod
-    classes is needed.
-    """
-    return DateTime() + 10
+  security.declareProtected(Permissions.AccessContentsInformation, 'getFrequencyIndex')
+  def getFrequencyIndex(self):
+    """
+      Returns the document update frequency as an integer
+      which is used by alamrs to decide which documents
+      must be updates at which time. The index represents
+      a time slot (ex. all days in a month, all hours in a week).
+    """
+    try:
+      return self.getUpdateFrequencyValue().getIntIndex()
+    except AttributeError:
+      # Catch Attribute error or Key error - XXX not beautiful
+      return 0
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getCreationDateIndex')
+  def getCreationDateIndex(self, at_date = None):
+    """
+    Returns the document Creation Date Index which is the creation 
+    date converted into hours modulo the Frequency Index.
+    """
+    frequency_index = self.getFrequencyIndex()
+    if not frequency_index: return -1 # If not update frequency is provided, make sure we never update
+    hour = convertDateToHour(date=self.getCreationDate())
+    creation_date_index = hour % frequency_index
+    # in the case of bisextile year, we substract 24 hours from the creation date,
+    # otherwise updating documents (frequency=yearly update) created the last
+    # 24 hours of bissextile year will be launched once every 4 years.
+    if creation_date_index >= number_of_hours_in_year:
+      creation_date_index = creation_date_index - number_of_hours_in_day
+
+    return creation_date_index
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'isUpdatable')
+  def isUpdatable(self):
+    """
+      This method is used to decide which document can be updated
+      in the crawling process. This can depend for example on
+      workflow states (publication state,
+      validation state) or on roles on the document.
+    """
+    method = self._getTypeBasedMethod('isUpdatable', 
+        fallback_script_id = 'Document_isUpdatable')
+    return method()