[Erp5-report] r31563 jerome - in /erp5/trunk/products: ERP5/Document/ ERP5OOo/tests/ ERP5OO...
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Jan 4 16:42:17 CET 2010
Author: jerome
Date: Mon Jan 4 16:42:13 2010
New Revision: 31563
URL: http://svn.erp5.org?rev=31563&view=rev
Log:
try pdftk to extract metadata, pdfinfo only returns standard PDF metadata.
Added:
erp5/trunk/products/ERP5OOo/tests/test_document/metadata.pdf (with props)
Modified:
erp5/trunk/products/ERP5/Document/PDFDocument.py
erp5/trunk/products/ERP5OOo/tests/testDms.py
Modified: erp5/trunk/products/ERP5/Document/PDFDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/PDFDocument.py?rev=31563&r1=31562&r2=31563&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] Mon Jan 4 16:42:13 2010
@@ -249,17 +249,40 @@
tmp = tempfile.NamedTemporaryFile()
tmp.write(str(self.data))
tmp.seek(0)
- cmd = 'pdfinfo -meta -box %s' % tmp.name
- r = os.popen(cmd)
- h = r.read()
- tmp.close()
- r.close()
- result = {}
- for line in h.splitlines():
- item_list = line.split(':')
- key = item_list[0].strip()
- value = ':'.join(item_list[1:]).strip()
- result[key] = value
+ try:
+ # First, we use pdfinfo to get standard metadata
+ cmd = 'pdfinfo -meta -box %s' % tmp.name
+ r = os.popen(cmd)
+ h = r.read()
+ r.close()
+ result = {}
+ for line in h.splitlines():
+ item_list = line.split(':')
+ key = item_list[0].strip()
+ value = ':'.join(item_list[1:]).strip()
+ result[key] = value
+
+ # Then we use pdftk to get extra metadata
+ cmd = 'pdftk %s dump_data output' % tmp.name
+ r = os.popen(cmd)
+ h = r.read()
+ r.close()
+ line_list = (line for line in h.splitlines())
+ while True:
+ try:
+ line = line_list.next()
+ except StopIteration:
+ break
+ if line.startswith('InfoKey'):
+ key = line[len('InfoKey: '):]
+ line = line_list.next()
+ assert line.startswith('InfoValue: '),\
+ "Wrong format returned by pdftk dump_data"
+ value = line[len('InfoValue: '):]
+ result.setdefault(key, value)
+ finally:
+ tmp.close()
+
self._content_information = result
return result.copy()
Modified: erp5/trunk/products/ERP5OOo/tests/testDms.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/tests/testDms.py?rev=31563&r1=31562&r2=31563&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/tests/testDms.py [utf8] (original)
+++ erp5/trunk/products/ERP5OOo/tests/testDms.py [utf8] Mon Jan 4 16:42:13 2010
@@ -792,6 +792,15 @@
self.assertEquals('title', content_information['Title'])
self.assertEquals('application/pdf', document.getContentType())
+ def test_PDF_content_information_extra_metadata(self):
+ # Extra metadata, such as those stored by pdftk update_info are also
+ # available in document.getContentInformation()
+ upload_file = makeFileUpload('metadata.pdf')
+ document = self.portal.portal_contributions.newContent(file=upload_file)
+ self.assertEquals('PDF', document.getPortalType())
+ content_information = document.getContentInformation()
+ self.assertEquals('the value', content_information['NonStandardMetadata'])
+
def test_PDF_content_content_type(self):
upload_file = makeFileUpload('REF-en-001.pdf')
document = self.portal.document_module.newContent(portal_type='PDF')
Added: erp5/trunk/products/ERP5OOo/tests/test_document/metadata.pdf
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/tests/test_document/metadata.pdf?rev=31563&view=auto
==============================================================================
Binary file - no diff available.
Propchange: erp5/trunk/products/ERP5OOo/tests/test_document/metadata.pdf
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
More information about the Erp5-report
mailing list