[Erp5-report] r9006 - /erp5/trunk/products/ERP5OOo/Document/OOoDocument.py

nobody at svn.erp5.org nobody at svn.erp5.org
Wed Aug 2 20:16:59 CEST 2006


Author: bartek
Date: Wed Aug  2 20:16:57 2006
New Revision: 9006

URL: http://svn.erp5.org?rev=9006&view=rev
Log:
Support for document-type attributes, incl. SearchableText; automatic extraction of plain text content for searching for Text type

Modified:
    erp5/trunk/products/ERP5OOo/Document/OOoDocument.py

Modified: erp5/trunk/products/ERP5OOo/Document/OOoDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/Document/OOoDocument.py?rev=9006&r1=9005&r2=9006&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/Document/OOoDocument.py (original)
+++ erp5/trunk/products/ERP5OOo/Document/OOoDocument.py Wed Aug  2 20:16:57 2006
@@ -100,6 +100,7 @@
                     , PropertySheet.DublinCore
                     , PropertySheet.Version
                     , PropertySheet.Reference
+                    , PropertySheet.Document
                     , PropertySheet.OOoDocument
                     )
 
@@ -116,6 +117,21 @@
     #XMLObject.__init__(self,*args,**kwargs)
     #File.__init__(self,*args,**kwargs)
     #self.__dav_collection__=0
+
+  ### Content indexing methods
+  security.declareProtected(Permissions.View, 'getSearchableText')
+  def getSearchableText(self, md=None):
+    """\
+    Used by the catalog for basic full text indexing
+    And so we end up with a strange hybrid of File and Document
+    """
+    searchable_attrs=('title','description','id','text_content','reference','version',
+        'short_title','keywords','subject','original_filename','source_project_title')
+    searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',searchable_attrs))
+    return searchable_text
+
+  SearchableText=getSearchableText
+
 
   security.declareProtected(Permissions.ModifyPortalContent,'clearCache')
   def clearCache(self):
@@ -235,8 +251,12 @@
     self.log('_convert',enc(self._unpackData(self.data))[:500])
     meta,oo_data=sp.run_convert(self.getOriginalFilename(),enc(self._unpackData(self.data)))
     self.oo_data=Pdata(dec(oo_data))
+    # now we get text content (for now, only for Text type)
+    # converting spreadsheet and presentations into plain text is less trivial
+    if self.getPortalType()=='Text':
+      nic,text_data=sp.run_generate(self.getOriginalFilename(),enc(self._unpackData(self.oo_data)),'txt')
+      self.setTextContent(dec(text_data))
     self._setMetaData(meta)
-    #self.refreshAllowedTargets()
 
   security.declarePrivate('_setMetaData')
   def _setMetaData(self,meta):




More information about the Erp5-report mailing list