[Erp5-report] r9205 - in /erp5/trunk/utils/ooodoc_server: serw.py worker.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Aug 14 22:06:53 CEST 2006
Author: bartek
Date: Mon Aug 14 22:06:51 2006
New Revision: 9205
URL: http://svn.erp5.org?rev=9205&view=rev
Log:
plain text extraction moved out (it does not require openoffice)
Modified:
erp5/trunk/utils/ooodoc_server/serw.py
erp5/trunk/utils/ooodoc_server/worker.py
Modified: erp5/trunk/utils/ooodoc_server/serw.py
URL: http://svn.erp5.org/erp5/trunk/utils/ooodoc_server/serw.py?rev=9205&r1=9204&r2=9205&view=diff
==============================================================================
--- erp5/trunk/utils/ooodoc_server/serw.py (original)
+++ erp5/trunk/utils/ooodoc_server/serw.py Mon Aug 14 22:06:51 2006
@@ -50,7 +50,7 @@
one of the **kwargs can be format (target format, default=appropriate
OOo format)'''
- public_funcs=('convert','getmetadata','setmetadata','generate','getmetadatatoolong','getplaintext')
+ public_funcs=('convert','getmetadata','setmetadata','generate','getmetadatatoolong')
def __init__(self):
self._generateFuncs()
@@ -74,10 +74,6 @@
meta,newfname=w.run_convert(fname,**kwargs)
return meta,newfname
- def getplaintext(self,w,fname,*args,**kwargs):
- meta,newfname=w.run_getplaintext(fname)
- return meta,newfname
-
def getmetadatatoolong(self,w,fname,*args,**kwargs):
meta=w.run_getmetadatatoolong(fname)
return meta,None
@@ -127,7 +123,7 @@
for f in os.listdir(os.path.join(config.basedir,'tmp')):
if f.startswith(os.path.basename(fname)): # html formats generate a number of files
lib.log('removing %s' % f)
- os.remove(self._mkName(f))
+ #os.remove(self._mkName(f))
any=True
if not any:
lib.log('no file beginning with %s' % fname,1)
Modified: erp5/trunk/utils/ooodoc_server/worker.py
URL: http://svn.erp5.org/erp5/trunk/utils/ooodoc_server/worker.py?rev=9205&r1=9204&r2=9205&view=diff
==============================================================================
--- erp5/trunk/utils/ooodoc_server/worker.py (original)
+++ erp5/trunk/utils/ooodoc_server/worker.py Mon Aug 14 22:06:51 2006
@@ -40,41 +40,8 @@
import lib
-from mimemapper import mimemapper
-
-class PlainWriter(formatter.DumbWriter):
- '''Writer stripping horizontal rules'''
-
- def send_hor_rule(self,*args,**kwargs):
- pass
-
-class CustomHTMLParser(htmllib.HTMLParser):
- '''parser that would pass on content of basic meta tags from web pages'''
-
- def do_meta(self,tag):
- t=dict(tag)
- if t.get('name','x').lower() in ('keywords','description'):
- self.formatter.add_flowing_data(t['content'])
-
- def feed(self,s):
- htmllib.HTMLParser.feed(self,s)
- self.formatter.add_flowing_data(self.title)
-
-
-class HTMLStripper(object):
-
- def strip_html(self,source_file,dest_file):
- fileob=open(source_file)
- filestring=fileob.read()
- fileob.close()
- fileob=open(dest_file,'w')
- w=PlainWriter(fileob)
- f=formatter.AbstractFormatter(w)
- p=CustomHTMLParser(f)
- p.feed(filestring)
- fileob.close()
-
-class Worker(HTMLStripper):
+
+class Worker(object):
'''
This class encapsulates an OOo instance providing interface
@@ -84,7 +51,7 @@
'''
# for automatic interface generation
- public_funcs=('convert','generate','setmetadata','getmetadata','getmetadatatoolong','getplaintext')
+ public_funcs=('convert','generate','setmetadata','getmetadata','getmetadatatoolong')
inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
@@ -136,30 +103,6 @@
self._loadFile(fname)
self._generate(format)
return mimemapper.getMimeFor(format),fileUrlToSystemPath(self.destUrl)
-
- def getplaintext(self,fname):
- '''get plain text out of the document (to be used in SearchableText)'''
- self._loadFile(fname)
- self._checkMimeType(1)
- mime=self.metadata['MIMEType']
- if mime=='application/vnd.oasis.opendocument.text':
- self._generate('txt')
- return None,fileUrlToSystemPath(self.destUrl)
- if mime=='application/vnd.oasis.opendocument.spreadsheet':
- # for want of a cleaner solution, we save as html (to save all sheets) and
- # convert to plain text
- self._generate('html-calc')
- filename=fileUrlToSystemPath(self.destUrl)
- self.strip_html(filename,filename)
- return None, filename
- if mime=='application/vnd.oasis.opendocument.presentation':
- # we do not know what to do
- pass
- if mime=='application/vnd.oasis.opendocument.graphics':
- # no plain text here
- pass
- # we should never get to this point, anyway...
- return None,None
def setmetadata(self,fname,meta):
'''set metadata on OOo file (fname does not change)'''
More information about the Erp5-report
mailing list