[Erp5-report] r9205 - in /erp5/trunk/utils/ooodoc_server: serw.py worker.py

nobody at svn.erp5.org nobody at svn.erp5.org
Mon Aug 14 22:06:53 CEST 2006


Author: bartek
Date: Mon Aug 14 22:06:51 2006
New Revision: 9205

URL: http://svn.erp5.org?rev=9205&view=rev
Log:
plain text extraction moved out (it does not require openoffice)

Modified:
    erp5/trunk/utils/ooodoc_server/serw.py
    erp5/trunk/utils/ooodoc_server/worker.py

Modified: erp5/trunk/utils/ooodoc_server/serw.py
URL: http://svn.erp5.org/erp5/trunk/utils/ooodoc_server/serw.py?rev=9205&r1=9204&r2=9205&view=diff
==============================================================================
--- erp5/trunk/utils/ooodoc_server/serw.py (original)
+++ erp5/trunk/utils/ooodoc_server/serw.py Mon Aug 14 22:06:51 2006
@@ -50,7 +50,7 @@
   one of the **kwargs can be format (target format, default=appropriate
   OOo format)'''
   
-  public_funcs=('convert','getmetadata','setmetadata','generate','getmetadatatoolong','getplaintext')
+  public_funcs=('convert','getmetadata','setmetadata','generate','getmetadatatoolong')
 
   def __init__(self):
     self._generateFuncs()
@@ -74,10 +74,6 @@
     meta,newfname=w.run_convert(fname,**kwargs)
     return meta,newfname
   
-  def getplaintext(self,w,fname,*args,**kwargs):
-    meta,newfname=w.run_getplaintext(fname)
-    return meta,newfname
-
   def getmetadatatoolong(self,w,fname,*args,**kwargs):
     meta=w.run_getmetadatatoolong(fname)
     return meta,None
@@ -127,7 +123,7 @@
     for f in os.listdir(os.path.join(config.basedir,'tmp')):
       if f.startswith(os.path.basename(fname)): # html formats generate a number of files
         lib.log('removing %s' % f)
-        os.remove(self._mkName(f))
+        #os.remove(self._mkName(f))
         any=True
     if not any:
       lib.log('no file beginning with %s' % fname,1)

Modified: erp5/trunk/utils/ooodoc_server/worker.py
URL: http://svn.erp5.org/erp5/trunk/utils/ooodoc_server/worker.py?rev=9205&r1=9204&r2=9205&view=diff
==============================================================================
--- erp5/trunk/utils/ooodoc_server/worker.py (original)
+++ erp5/trunk/utils/ooodoc_server/worker.py Mon Aug 14 22:06:51 2006
@@ -40,41 +40,8 @@
 
 import lib
 
-from mimemapper import mimemapper
-
-class PlainWriter(formatter.DumbWriter):
-  '''Writer stripping horizontal rules'''
-
-  def send_hor_rule(self,*args,**kwargs):
-    pass
-
-class CustomHTMLParser(htmllib.HTMLParser):
-  '''parser that would pass on content of basic meta tags from web pages'''
-
-  def do_meta(self,tag):
-    t=dict(tag)
-    if t.get('name','x').lower() in ('keywords','description'):
-      self.formatter.add_flowing_data(t['content'])
-
-  def feed(self,s):
-    htmllib.HTMLParser.feed(self,s)
-    self.formatter.add_flowing_data(self.title)
-
-
-class HTMLStripper(object):
-
-  def strip_html(self,source_file,dest_file):
-    fileob=open(source_file)
-    filestring=fileob.read()
-    fileob.close()
-    fileob=open(dest_file,'w')
-    w=PlainWriter(fileob)
-    f=formatter.AbstractFormatter(w)
-    p=CustomHTMLParser(f)
-    p.feed(filestring)
-    fileob.close()
-
-class Worker(HTMLStripper):
+
+class Worker(object):
 
   '''
   This class encapsulates an OOo instance providing interface
@@ -84,7 +51,7 @@
   '''
 
   # for automatic interface generation
-  public_funcs=('convert','generate','setmetadata','getmetadata','getmetadatatoolong','getplaintext')
+  public_funcs=('convert','generate','setmetadata','getmetadata','getmetadatatoolong')
 
   inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
 
@@ -136,30 +103,6 @@
     self._loadFile(fname)
     self._generate(format)
     return mimemapper.getMimeFor(format),fileUrlToSystemPath(self.destUrl)
-
-  def getplaintext(self,fname):
-    '''get plain text out of the document (to be used in SearchableText)'''
-    self._loadFile(fname)
-    self._checkMimeType(1)
-    mime=self.metadata['MIMEType']
-    if mime=='application/vnd.oasis.opendocument.text':
-      self._generate('txt')
-      return None,fileUrlToSystemPath(self.destUrl)
-    if mime=='application/vnd.oasis.opendocument.spreadsheet':
-      # for want of a cleaner solution, we save as html (to save all sheets) and 
-      # convert to plain text
-      self._generate('html-calc')
-      filename=fileUrlToSystemPath(self.destUrl)
-      self.strip_html(filename,filename)
-      return None, filename
-    if mime=='application/vnd.oasis.opendocument.presentation':
-      # we do not know what to do
-      pass
-    if mime=='application/vnd.oasis.opendocument.graphics':
-      # no plain text here
-      pass
-    # we should never get to this point, anyway...
-    return None,None
 
   def setmetadata(self,fname,meta):
     '''set metadata on OOo file (fname does not change)'''




More information about the Erp5-report mailing list