[Erp5-report] r24286 - /erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Wed Oct 22 16:17:51 CEST 2008
Author: romain
Date: Wed Oct 22 16:17:42 2008
New Revision: 24286
URL: http://svn.erp5.org?rev=24286&view=rev
Log:
Add new transformation from png to text using ocropus.
This requires to install Ocropus (http://sites.google.com/site/ocropus/), which
is not yet in ERP5 dependency.
Added:
erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py
Added: erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py?rev=24286&view=auto
==============================================================================
--- erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py (added)
+++ erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py Wed Oct 22 16:17:42 2008
@@ -1,0 +1,54 @@
+from Products.PortalTransforms.interfaces import itransform
+from StringIO import StringIO
+import PIL.Image
+from Products.PortalTransforms.libtransforms.commandtransform \
+ import popentransform
+
+import os
+import sys
+import tempfile
+
+class png_to_text(popentransform):
+ __implements__ = itransform
+ __name__ = "png_to_text"
+
+ inputs = ('image/png',)
+ output = 'text/plain'
+ output_encoding = 'utf-8'
+
+ __version__ = '2008-10-07.01'
+
+ binaryName = "ocrocmd"
+ binaryArgs = "%(infile)s "
+ useStdin = False
+
+ def convert(self, data, cache, **kwargs):
+ # XXX Surcharge from commandtransform, as ocrocmd do not accept
+ # parameters but environnement variable.
+ # Surcharging prevent to put the variable in the zope.conf file
+ command = "%s %s" % (self.binary, self.binaryArgs)
+ if not self.useStdin:
+ tmpfile, tmpname = tempfile.mkstemp(text=False) # create tmp
+ os.write(tmpfile, data) # write data to tmp using a file descriptor
+ os.close(tmpfile) # close it so the other process can read it
+ command = command % { 'infile' : tmpname } # apply tmp name to command
+
+ cin, couterr = os.popen4('quiet=1 hocr=0 %s' % command, 'b')
+
+ if self.useStdin:
+ cin.write(str(data))
+
+ status = cin.close()
+
+ out = self.getData(couterr)
+ couterr.close()
+
+ if not self.useStdin:
+ # remove tmp file
+ os.unlink(tmpname)
+
+ cache.setData(out)
+ return cache
+
+def register():
+ return png_to_text()
More information about the Erp5-report
mailing list