[Erp5-report] r34146 mame - /erp5/trunk/products/ERP5/Document/
nobody at svn.erp5.org
nobody at svn.erp5.org
Fri Mar 26 13:57:38 CET 2010
Author: mame
Date: Fri Mar 26 13:57:37 2010
New Revision: 34146
URL: http://svn.erp5.org?rev=34146&view=rev
Log:
Modified to revert back to revision before 34090
Modified:
erp5/trunk/products/ERP5/Document/Document.py
erp5/trunk/products/ERP5/Document/Image.py
erp5/trunk/products/ERP5/Document/PDFDocument.py
erp5/trunk/products/ERP5/Document/TextDocument.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=34146&r1=34145&r2=34146&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Fri Mar 26 13:57:37 2010
@@ -56,12 +56,6 @@
# Mixin Import
from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
-from Products.ERP5.mixin.convertable import ConvertableMixin
-from Products.ERP5.mixin.text_convertable import TextConvertableMixin
-from Products.ERP5.mixin.base_convertable import BaseConvertableMixin
-from Products.ERP5.mixin.html_convertable import HTMLConvertableMixin
-from Products.ERP5.mixin.metadata_discoverable import MetadataDiscoverableMixin
-from Products.ERP5.mixin.document import DocumentMixin
_MARKER = []
VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -312,8 +306,7 @@
return method()
-class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConvertableMixin, TextConvertableMixin,HTMLConvertableMixin,
- DocumentMixin, BaseConvertableMixin, MetadataDiscoverableMixin, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
+class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
"""Document is an abstract class with all methods related to document
management in ERP5. This includes searchable text, explicit relations,
implicit relations, metadata, versions, languages, etc.
@@ -1084,6 +1077,267 @@
method = self._getTypeBasedMethod('finishIngestion', fallback_script_id='Document_finishIngestion')
return method()
+ # Conversion methods
+ security.declareProtected(Permissions.AccessContentsInformation, 'convert')
+ def convert(self, format, **kw):
+ """
+ Main content conversion function, returns result which should
+ be returned and stored in cache.
+ format - the format specied in the form of an extension
+ string (ex. jpeg, html, text, txt, etc.)
+ **kw can be various things - e.g. resolution
+
+ Default implementation returns an empty string (html, text)
+ or raises an error.
+
+ TODO:
+ - implement guards API so that conversion to certain
+ formats require certain permission
+ """
+ if format == 'html':
+ return 'text/html', '' # XXX - Why ?
+ if format in ('text', 'txt'):
+ return 'text/plain', '' # XXX - Why ?
+ raise NotImplementedError
+
+ security.declareProtected(Permissions.View, 'asSubjectText')
+ def asSubjectText(self, **kw):
+ """
+ Converts the subject of the document to a textual representation.
+ """
+ subject = self.getSubject()
+ if not subject:
+ # XXX not sure if this fallback is a good idea.
+ subject = self.getTitle()
+ if subject is None:
+ subject = ''
+ return str(subject)
+
+ security.declareProtected(Permissions.View, 'asText')
+ def asText(self, **kw):
+ """
+ Converts the content of the document to a textual representation.
+ """
+ kw['format'] = 'txt'
+ mime, data = self.convert(**kw)
+ return str(data)
+
+ security.declareProtected(Permissions.View, 'asEntireHTML')
+ def asEntireHTML(self, **kw):
+ """
+ Returns a complete HTML representation of the document
+ (with body tags, etc.). Adds if necessary a base
+ tag so that the document can be displayed in an iframe
+ or standalone.
+
+ Actual conversion is delegated to _asHTML
+ """
+ html = self._asHTML(**kw)
+ if self.getUrlString():
+ # If a URL is defined, add the base tag
+ # if base is defined yet.
+ html = str(html)
+ if not html.find('<base') >= 0:
+ base = '<base href="%s">' % self.getContentBaseURL()
+ html = html.replace('<head>', '<head>%s' % base)
+ self.setConversion(html, mime='text/html', format='base-html')
+ return html
+
+ security.declarePrivate('_asHTML')
+ def _asHTML(self, **kw):
+ """
+ A private method which converts to HTML. This method
+ is the one to override in subclasses.
+ """
+ if not self.hasBaseData():
+ raise ConversionError('This document has not been processed yet.')
+ try:
+ # FIXME: no substitution may occur in this case.
+ mime, data = self.getConversion(format='base-html')
+ return data
+ except KeyError:
+ kw['format'] = 'html'
+ mime, html = self.convert(**kw)
+ return html
+
+ security.declareProtected(Permissions.View, 'asStrippedHTML')
+ def asStrippedHTML(self, **kw):
+ """
+ Returns a stripped HTML representation of the document
+ (without html and body tags, etc.) which can be used to inline
+ a preview of the document.
+ """
+ if not self.hasBaseData():
+ return ''
+ try:
+ # FIXME: no substitution may occur in this case.
+ mime, data = self.getConversion(format='stripped-html')
+ return data
+ except KeyError:
+ kw['format'] = 'html'
+ mime, html = self.convert(**kw)
+ return self._stripHTML(str(html))
+
+ def _guessEncoding(self, string):
+ """
+ Try to guess the encoding for this string.
+ Returns None if no encoding can be guessed.
+ """
+ try:
+ import chardet
+ except ImportError:
+ return None
+ return chardet.detect(string).get('encoding', None)
+
+ def _stripHTML(self, html, charset=None):
+ """
+ A private method which can be reused by subclasses
+ to strip HTML content
+ """
+ body_list = re.findall(self.body_parser, str(html))
+ if len(body_list):
+ stripped_html = body_list[0]
+ else:
+ stripped_html = html
+ # find charset and convert to utf-8
+ charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this
+ # is datastream instance but hard to do better
+ if charset and not charset_list:
+ # Use optional parameter is we can not find encoding in HTML
+ charset_list = [charset]
+ if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
+ try:
+ stripped_html = unicode(str(stripped_html),
+ charset_list[0]).encode('utf-8')
+ except (UnicodeDecodeError, LookupError):
+ return str(stripped_html)
+ return stripped_html
+
+ def _safeHTML(self, html, format='text/x-html-safe', charset=None):
+ """
+ A private method to strip HTML content in safe mode,
+ w/o emmbed javascript, forms and any external plugins imports.
+ This should be used when we do not trust the user (Anonymous)
+ who push data into database.
+ - html: content to strip
+ - format: destination format
+ - charset: charset used to encode string. Take precedence
+ on charset values found in html string
+ """
+ portal = self.getPortalObject()
+ if charset is None:
+ # find charset
+ charset_list = self.charset_parser.findall(html)
+ if charset_list:
+ charset = charset_list[0]
+ if charset and charset not in ('utf-8', 'UTF-8'):
+ try:
+ safe_html_string = html.decode(charset).encode('utf-8')
+ except (UnicodeDecodeError, LookupError):
+ pass
+ else:
+ charset = 'utf-8' # Override charset if convertion succeeds
+ transform_tool = getToolByName(portal, 'portal_transforms')
+ safe_html_string = transform_tool.convertToData(format, html,
+ encoding=charset)
+ return safe_html_string
+
+ security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
+ def getContentInformation(self):
+ """
+ Returns the content information from the HTML conversion.
+ The default implementation tries to build a dictionnary
+ from the HTML conversion of the document and extract
+ the document title.
+ """
+ result = {}
+ html = self.asEntireHTML()
+ if not html: return result
+ title_list = re.findall(self.title_parser, str(html))
+ if title_list:
+ result['title'] = title_list[0]
+ return result
+
+ # Base format support
+ security.declareProtected(Permissions.ModifyPortalContent, 'convertToBaseFormat')
+ def convertToBaseFormat(self, **kw):
+ """
+ Converts the content of the document to a base format
+ which is later used for all conversions. This method
+ is common to all kinds of documents and handles
+ exceptions in a unified way.
+
+ Implementation is delegated to _convertToBaseFormat which
+ must be overloaded by subclasses of Document which
+ need a base format.
+
+ convertToBaseFormat is called upon file upload, document
+ ingestion by the processing_status_workflow.
+
+ NOTE: the data of the base format conversion should be stored
+ using the base_data property. Refer to Document.py propertysheet.
+ Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
+ """
+ if getattr(self, 'hasData', None) is not None and not self.hasData():
+ # Empty document cannot be converted
+ return
+ try:
+ message = self._convertToBaseFormat() # Call implemetation method
+ self.clearConversionCache() # Conversion cache is now invalid
+ if message is None:
+ # XXX Need to translate.
+ message = 'Converted to %s.' % self.getBaseContentType()
+ self.convertFile(comment=message) # Invoke workflow method
+ except NotImplementedError:
+ message = ''
+ return message
+
+ def _convertToBaseFormat(self):
+ """
+ """
+ raise NotImplementedError
+
+ security.declareProtected(Permissions.AccessContentsInformation,
+ 'isSupportBaseDataConversion')
+ def isSupportBaseDataConversion(self):
+ """
+ """
+ return False
+
+ def convertFile(self, **kw): # XXX - It it really useful to explicitly define ?
+ """
+ Workflow transition invoked when conversion occurs.
+ """
+ convertFile = WorkflowMethod(convertFile)
+
+ security.declareProtected(Permissions.AccessContentsInformation,
+ 'getMetadataMappingDict')
+ def getMetadataMappingDict(self):
+ """
+ Return a dict of metadata mapping used to update base metadata of the
+ document
+ """
+ try:
+ method = self._getTypeBasedMethod('getMetadataMappingDict')
+ except KeyError, AttributeError:
+ method = None
+ if method is not None:
+ return method()
+ else:
+ return {}
+
+ security.declareProtected(Permissions.ModifyPortalContent, 'updateBaseMetadata')
+ def updateBaseMetadata(self, **kw):
+ """
+ Update the base format data with the latest properties entered
+ by the user. For example, if title is changed in ERP5 interface,
+ the base format file should be updated accordingly.
+
+ Default implementation does nothing. Refer to OOoDocument class
+ for an example of implementation.
+ """
+ pass
+
# Transformation API
security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
def populateContent(self):
Modified: erp5/trunk/products/ERP5/Document/Image.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Image.py?rev=34146&r1=34145&r2=34146&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Image.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Image.py [utf8] Fri Mar 26 13:57:37 2010
@@ -53,8 +53,6 @@
from zLOG import LOG, WARNING
from Products.CMFCore.utils import getToolByName
-#Mixin import
-from Products.ERP5.mixin.convertable import ConvertableMixin
default_displays_id_list = ('nano', 'micro', 'thumbnail',
'xsmall', 'small', 'medium',
@@ -62,7 +60,7 @@
default_formats = ['jpg', 'jpeg', 'png', 'gif', 'pnm', 'ppm']
-class Image(File, OFSImage, ConvertableMixin):
+class Image(File, OFSImage):
"""
An Image is a File which contains image data. It supports
various conversions of format, size, resolution through
@@ -324,36 +322,11 @@
return mime_type, result
# Conversion API
- security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
- def getAllowedTargetItemList(self):
- import commands
- import re
- import os
- new_result = []
- filename = os.path.abspath(self.getSourceReference())
- result = commands.getstatusoutput('convert -list format %s ' % self.getSourceReference())
- new_list = re.split('\n',result[1])
- allowed = []
- for new_str in new_list:
- test_str = new_str.lstrip()
- pattern = re.compile(r'''([A-z]+[*]?\s+[A-z]+\s+[rw+-]+\s+[A-z]+\s+[A-z]+\D+[A-z]+)''',re.VERBOSE)
- if re.match(pattern,test_str):
- new_result.append(test_str)
-
- len_new_result = len(new_result)
- for i in range(0,len_new_result):
- allowed.append(list((new_result[i].split()[1].lower(),' '.join(new_result[i].split()[3:]))))
- return [(y, x) for x, y in allowed]
-
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, display=None, quality=75, resolution=None, frame=None, **kw):
"""
Implementation of conversion for Image files
"""
- # Raise an error if the format is not permitted
- if not self.isTargetFormatPermitted(format):
- raise Unauthorized("User does not have enough permission to access document"
- " in %s format" % (format or 'original'))
if format in ('text', 'txt', 'html', 'base_html', 'stripped-html'):
try:
return self.getConversion(format=format)
@@ -366,7 +339,7 @@
if (display is not None or resolution is not None or quality != 75 or format != ''\
or frame is not None) and image_size:
kw = dict(display=display, format=format, quality=quality,
- resolution=resolution, frame=frame, image_size=image_size)
+ resolution=resolution, frame=frame, image_size=image_size)
try:
mime, image = self.getConversion(**kw)
except KeyError:
@@ -396,7 +369,7 @@
# display may be set from a cookie (?)
image_size = self.getSizeFromImageDisplay(display)
kw = dict(display=display, format=format, quality=quality,
- resolution=resolution, frame=frame, image_size=image_size)
+ resolution=resolution, frame=frame, image_size=image_size)
_setCacheHeaders(_ViewEmulator().__of__(self), kw)
if (display is not None or resolution is not None or quality != 75 or format != ''\
Modified: erp5/trunk/products/ERP5/Document/PDFDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/PDFDocument.py?rev=34146&r1=34145&r2=34146&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] Fri Mar 26 13:57:37 2010
@@ -37,13 +37,7 @@
from Products.ERP5.Document.Document import ConversionError
from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
-
-from zLOG import LOG, WARNING
-# Mixin import
-from Products.ERP5.mixin.convertable import ConvertableMixin
-
-
-class PDFDocument(Image, ConvertableMixin, CachedConvertableMixin):
+class PDFDocument(Image, CachedConvertableMixin):
"""
PDFDocument is a subclass of Image which is able to
extract text content from a PDF file either as text
@@ -104,11 +98,6 @@
resolution=resolution, frame=frame)
# Conversion API
- security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
- def getAllowedTargetItemList(self):
- return Image.getAllowedTargetItemList(self) + \
- [('Text', 'txt'),('Plain Text','text'), ('HTML Document', 'html')]
-
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, **kw):
"""
Modified: erp5/trunk/products/ERP5/Document/TextDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/TextDocument.py?rev=34146&r1=34145&r2=34146&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] Fri Mar 26 13:57:37 2010
@@ -43,12 +43,9 @@
except ImportError:
from Products.ERP5Type.patches.string import Template
-# Mixin import
-from Products.ERP5.mixin.convertable import ConvertableMixin
-
DEFAULT_TEXT_FORMAT = 'text/html'
-class TextDocument(Document, TextContent, ConvertableMixin):
+class TextDocument(Document, TextContent):
"""
A Document contains text which can be formatted using
*Structured Text* or *HTML*. Text can be automatically translated
@@ -149,10 +146,6 @@
if format is None:
# The default is to use ERP5 Forms to render the page
return self.view()
- # Raise an error if the format is not permitted
- if not self.isTargetFormatPermitted(format):
- raise Unauthorized("User does not have enough permission to access document"
- " in %s format" % (format or 'original'))
mime, data = self.convert(format=format)
RESPONSE.setHeader('Content-Length', len(str(data))) # XXX - Not efficient
# if datastream instance
@@ -207,25 +200,12 @@
substitution_method_parameter_dict = {}
return self._substituteTextContent(subject, safe_substitute=safe_substitute,
**substitution_method_parameter_dict)
-
- security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
- def getAllowedTargetItemList(self):
- mime_type = getToolByName(self, 'mimetypes_registry')
- allowed=[]
- for extension in mime_type.extensions:
- allowed.append((mime_type.extensions[extension].name(),extension))
-
- return [(y, x) for x, y in allowed]
-
+
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, substitution_method_parameter_dict=None, safe_substitute=True, **kw):
"""
Convert text using portal_transforms or oood
"""
- # Raise an error if the format is not permitted
- if not self.isTargetFormatPermitted(format):
- raise Unauthorized("User does not have enough permission to access document"
- " in %s format" % (format or 'original'))
# Accelerate rendering in Web mode
_setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
# Return the raw content
More information about the Erp5-report
mailing list