[Erp5-report] r24286 - /erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py

nobody at svn.erp5.org nobody at svn.erp5.org
Wed Oct 22 16:17:51 CEST 2008


Author: romain
Date: Wed Oct 22 16:17:42 2008
New Revision: 24286

URL: http://svn.erp5.org?rev=24286&view=rev
Log:
Add new transformation from png to text using ocropus.
This requires to install Ocropus (http://sites.google.com/site/ocropus/), which
is not yet in ERP5 dependency.

Added:
    erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py

Added: erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py?rev=24286&view=auto
==============================================================================
--- erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py (added)
+++ erp5/trunk/products/ERP5Type/patches/transforms/png_to_text.py Wed Oct 22 16:17:42 2008
@@ -1,0 +1,54 @@
+from Products.PortalTransforms.interfaces import itransform
+from StringIO import StringIO
+import PIL.Image
+from Products.PortalTransforms.libtransforms.commandtransform \
+    import popentransform
+
+import os
+import sys
+import tempfile
+
+class png_to_text(popentransform):
+    __implements__ = itransform
+    __name__  = "png_to_text"
+
+    inputs   = ('image/png',)
+    output  = 'text/plain'
+    output_encoding = 'utf-8'
+    
+    __version__ = '2008-10-07.01'
+
+    binaryName = "ocrocmd"
+    binaryArgs = "%(infile)s "
+    useStdin = False
+
+    def convert(self, data, cache, **kwargs):
+        # XXX Surcharge from commandtransform, as ocrocmd do not accept 
+        # parameters but environnement variable.
+        # Surcharging prevent to put the variable in the zope.conf file
+        command = "%s %s" % (self.binary, self.binaryArgs)
+        if not self.useStdin:
+            tmpfile, tmpname = tempfile.mkstemp(text=False) # create tmp
+            os.write(tmpfile, data) # write data to tmp using a file descriptor
+            os.close(tmpfile)       # close it so the other process can read it
+            command = command % { 'infile' : tmpname } # apply tmp name to command
+
+        cin, couterr = os.popen4('quiet=1 hocr=0 %s' % command, 'b')
+
+        if self.useStdin:
+            cin.write(str(data))
+
+        status = cin.close()
+
+        out = self.getData(couterr)
+        couterr.close()
+
+        if not self.useStdin:
+            # remove tmp file
+            os.unlink(tmpname)
+                     
+        cache.setData(out)                                                                                                                             
+        return cache       
+
+def register():
+    return png_to_text()




More information about the Erp5-report mailing list