[Erp5-report] r34092 mame - /erp5/trunk/products/ERP5/Document/
nobody at svn.erp5.org
nobody at svn.erp5.org
Thu Mar 25 13:18:50 CET 2010
Author: mame
Date: Thu Mar 25 13:18:44 2010
New Revision: 34092
URL: http://svn.erp5.org?rev=34092&view=rev
Log:
-Remove Conversion API in Document.py as it is now in mixin/convertable
-Modified to include methods that returns allowed target item list for
conversion
Modified:
erp5/trunk/products/ERP5/Document/Document.py
erp5/trunk/products/ERP5/Document/Image.py
erp5/trunk/products/ERP5/Document/PDFDocument.py
erp5/trunk/products/ERP5/Document/TextDocument.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Thu Mar 25 13:18:44 2010
@@ -56,6 +56,12 @@
# Mixin Import
from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
+from Products.ERP5.mixin.convertable import ConvertableMixin
+from Products.ERP5.mixin.text_convertable import TextConvertableMixin
+from Products.ERP5.mixin.base_convertable import BaseConvertableMixin
+from Products.ERP5.mixin.html_convertable import HTMLConvertableMixin
+from Products.ERP5.mixin.metadata_discoverable import MetadataDiscoverableMixin
+from Products.ERP5.mixin.document import DocumentMixin
_MARKER = []
VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input')
@@ -306,7 +312,8 @@
return method()
-class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
+class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConvertableMixin, TextConvertableMixin,HTMLConvertableMixin,
+ DocumentMixin, BaseConvertableMixin, MetadataDiscoverableMixin, CachedConvertableMixin, SnapshotMixin, UpdateMixIn):
"""Document is an abstract class with all methods related to document
management in ERP5. This includes searchable text, explicit relations,
implicit relations, metadata, versions, languages, etc.
@@ -1077,267 +1084,6 @@
method = self._getTypeBasedMethod('finishIngestion', fallback_script_id='Document_finishIngestion')
return method()
- # Conversion methods
- security.declareProtected(Permissions.AccessContentsInformation, 'convert')
- def convert(self, format, **kw):
- """
- Main content conversion function, returns result which should
- be returned and stored in cache.
- format - the format specied in the form of an extension
- string (ex. jpeg, html, text, txt, etc.)
- **kw can be various things - e.g. resolution
-
- Default implementation returns an empty string (html, text)
- or raises an error.
-
- TODO:
- - implement guards API so that conversion to certain
- formats require certain permission
- """
- if format == 'html':
- return 'text/html', '' # XXX - Why ?
- if format in ('text', 'txt'):
- return 'text/plain', '' # XXX - Why ?
- raise NotImplementedError
-
- security.declareProtected(Permissions.View, 'asSubjectText')
- def asSubjectText(self, **kw):
- """
- Converts the subject of the document to a textual representation.
- """
- subject = self.getSubject()
- if not subject:
- # XXX not sure if this fallback is a good idea.
- subject = self.getTitle()
- if subject is None:
- subject = ''
- return str(subject)
-
- security.declareProtected(Permissions.View, 'asText')
- def asText(self, **kw):
- """
- Converts the content of the document to a textual representation.
- """
- kw['format'] = 'txt'
- mime, data = self.convert(**kw)
- return str(data)
-
- security.declareProtected(Permissions.View, 'asEntireHTML')
- def asEntireHTML(self, **kw):
- """
- Returns a complete HTML representation of the document
- (with body tags, etc.). Adds if necessary a base
- tag so that the document can be displayed in an iframe
- or standalone.
-
- Actual conversion is delegated to _asHTML
- """
- html = self._asHTML(**kw)
- if self.getUrlString():
- # If a URL is defined, add the base tag
- # if base is defined yet.
- html = str(html)
- if not html.find('<base') >= 0:
- base = '<base href="%s">' % self.getContentBaseURL()
- html = html.replace('<head>', '<head>%s' % base)
- self.setConversion(html, mime='text/html', format='base-html')
- return html
-
- security.declarePrivate('_asHTML')
- def _asHTML(self, **kw):
- """
- A private method which converts to HTML. This method
- is the one to override in subclasses.
- """
- if not self.hasBaseData():
- raise ConversionError('This document has not been processed yet.')
- try:
- # FIXME: no substitution may occur in this case.
- mime, data = self.getConversion(format='base-html')
- return data
- except KeyError:
- kw['format'] = 'html'
- mime, html = self.convert(**kw)
- return html
-
- security.declareProtected(Permissions.View, 'asStrippedHTML')
- def asStrippedHTML(self, **kw):
- """
- Returns a stripped HTML representation of the document
- (without html and body tags, etc.) which can be used to inline
- a preview of the document.
- """
- if not self.hasBaseData():
- return ''
- try:
- # FIXME: no substitution may occur in this case.
- mime, data = self.getConversion(format='stripped-html')
- return data
- except KeyError:
- kw['format'] = 'html'
- mime, html = self.convert(**kw)
- return self._stripHTML(str(html))
-
- def _guessEncoding(self, string):
- """
- Try to guess the encoding for this string.
- Returns None if no encoding can be guessed.
- """
- try:
- import chardet
- except ImportError:
- return None
- return chardet.detect(string).get('encoding', None)
-
- def _stripHTML(self, html, charset=None):
- """
- A private method which can be reused by subclasses
- to strip HTML content
- """
- body_list = re.findall(self.body_parser, str(html))
- if len(body_list):
- stripped_html = body_list[0]
- else:
- stripped_html = html
- # find charset and convert to utf-8
- charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this
- # is datastream instance but hard to do better
- if charset and not charset_list:
- # Use optional parameter is we can not find encoding in HTML
- charset_list = [charset]
- if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
- try:
- stripped_html = unicode(str(stripped_html),
- charset_list[0]).encode('utf-8')
- except (UnicodeDecodeError, LookupError):
- return str(stripped_html)
- return stripped_html
-
- def _safeHTML(self, html, format='text/x-html-safe', charset=None):
- """
- A private method to strip HTML content in safe mode,
- w/o emmbed javascript, forms and any external plugins imports.
- This should be used when we do not trust the user (Anonymous)
- who push data into database.
- - html: content to strip
- - format: destination format
- - charset: charset used to encode string. Take precedence
- on charset values found in html string
- """
- portal = self.getPortalObject()
- if charset is None:
- # find charset
- charset_list = self.charset_parser.findall(html)
- if charset_list:
- charset = charset_list[0]
- if charset and charset not in ('utf-8', 'UTF-8'):
- try:
- safe_html_string = html.decode(charset).encode('utf-8')
- except (UnicodeDecodeError, LookupError):
- pass
- else:
- charset = 'utf-8' # Override charset if convertion succeeds
- transform_tool = getToolByName(portal, 'portal_transforms')
- safe_html_string = transform_tool.convertToData(format, html,
- encoding=charset)
- return safe_html_string
-
- security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
- def getContentInformation(self):
- """
- Returns the content information from the HTML conversion.
- The default implementation tries to build a dictionnary
- from the HTML conversion of the document and extract
- the document title.
- """
- result = {}
- html = self.asEntireHTML()
- if not html: return result
- title_list = re.findall(self.title_parser, str(html))
- if title_list:
- result['title'] = title_list[0]
- return result
-
- # Base format support
- security.declareProtected(Permissions.ModifyPortalContent, 'convertToBaseFormat')
- def convertToBaseFormat(self, **kw):
- """
- Converts the content of the document to a base format
- which is later used for all conversions. This method
- is common to all kinds of documents and handles
- exceptions in a unified way.
-
- Implementation is delegated to _convertToBaseFormat which
- must be overloaded by subclasses of Document which
- need a base format.
-
- convertToBaseFormat is called upon file upload, document
- ingestion by the processing_status_workflow.
-
- NOTE: the data of the base format conversion should be stored
- using the base_data property. Refer to Document.py propertysheet.
- Use accessors (getBaseData, setBaseData, hasBaseData, etc.)
- """
- if getattr(self, 'hasData', None) is not None and not self.hasData():
- # Empty document cannot be converted
- return
- try:
- message = self._convertToBaseFormat() # Call implemetation method
- self.clearConversionCache() # Conversion cache is now invalid
- if message is None:
- # XXX Need to translate.
- message = 'Converted to %s.' % self.getBaseContentType()
- self.convertFile(comment=message) # Invoke workflow method
- except NotImplementedError:
- message = ''
- return message
-
- def _convertToBaseFormat(self):
- """
- """
- raise NotImplementedError
-
- security.declareProtected(Permissions.AccessContentsInformation,
- 'isSupportBaseDataConversion')
- def isSupportBaseDataConversion(self):
- """
- """
- return False
-
- def convertFile(self, **kw): # XXX - It it really useful to explicitly define ?
- """
- Workflow transition invoked when conversion occurs.
- """
- convertFile = WorkflowMethod(convertFile)
-
- security.declareProtected(Permissions.AccessContentsInformation,
- 'getMetadataMappingDict')
- def getMetadataMappingDict(self):
- """
- Return a dict of metadata mapping used to update base metadata of the
- document
- """
- try:
- method = self._getTypeBasedMethod('getMetadataMappingDict')
- except KeyError, AttributeError:
- method = None
- if method is not None:
- return method()
- else:
- return {}
-
- security.declareProtected(Permissions.ModifyPortalContent, 'updateBaseMetadata')
- def updateBaseMetadata(self, **kw):
- """
- Update the base format data with the latest properties entered
- by the user. For example, if title is changed in ERP5 interface,
- the base format file should be updated accordingly.
-
- Default implementation does nothing. Refer to OOoDocument class
- for an example of implementation.
- """
- pass
-
# Transformation API
security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
def populateContent(self):
Modified: erp5/trunk/products/ERP5/Document/Image.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Image.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Image.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Image.py [utf8] Thu Mar 25 13:18:44 2010
@@ -53,6 +53,8 @@
from zLOG import LOG, WARNING
from Products.CMFCore.utils import getToolByName
+#Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
default_displays_id_list = ('nano', 'micro', 'thumbnail',
'xsmall', 'small', 'medium',
@@ -60,7 +62,7 @@
default_formats = ['jpg', 'jpeg', 'png', 'gif', 'pnm', 'ppm']
-class Image(File, OFSImage):
+class Image(File, OFSImage, ConvertableMixin):
"""
An Image is a File which contains image data. It supports
various conversions of format, size, resolution through
@@ -322,11 +324,36 @@
return mime_type, result
# Conversion API
+ security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+ def getAllowedTargetItemList(self):
+ import commands
+ import re
+ import os
+ new_result = []
+ filename = os.path.abspath(self.getSourceReference())
+ result = commands.getstatusoutput('convert -list format %s ' % self.getSourceReference())
+ new_list = re.split('\n',result[1])
+ allowed = []
+ for new_str in new_list:
+ test_str = new_str.lstrip()
+ pattern = re.compile(r'''([A-z]+[*]?\s+[A-z]+\s+[rw+-]+\s+[A-z]+\s+[A-z]+\D+[A-z]+)''',re.VERBOSE)
+ if re.match(pattern,test_str):
+ new_result.append(test_str)
+
+ len_new_result = len(new_result)
+ for i in range(0,len_new_result):
+ allowed.append(list((new_result[i].split()[1].lower(),' '.join(new_result[i].split()[3:]))))
+ return [(y, x) for x, y in allowed]
+
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, display=None, quality=75, resolution=None, frame=None, **kw):
"""
Implementation of conversion for Image files
"""
+ # Raise an error if the format is not permitted
+ if not self.isTargetFormatPermitted(format):
+ raise Unauthorized("User does not have enough permission to access document"
+ " in %s format" % (format or 'original'))
if format in ('text', 'txt', 'html', 'base_html', 'stripped-html'):
try:
return self.getConversion(format=format)
@@ -339,7 +366,7 @@
if (display is not None or resolution is not None or quality != 75 or format != ''\
or frame is not None) and image_size:
kw = dict(display=display, format=format, quality=quality,
- resolution=resolution, frame=frame, image_size=image_size)
+ resolution=resolution, frame=frame, image_size=image_size)
try:
mime, image = self.getConversion(**kw)
except KeyError:
@@ -369,7 +396,7 @@
# display may be set from a cookie (?)
image_size = self.getSizeFromImageDisplay(display)
kw = dict(display=display, format=format, quality=quality,
- resolution=resolution, frame=frame, image_size=image_size)
+ resolution=resolution, frame=frame, image_size=image_size)
_setCacheHeaders(_ViewEmulator().__of__(self), kw)
if (display is not None or resolution is not None or quality != 75 or format != ''\
Modified: erp5/trunk/products/ERP5/Document/PDFDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/PDFDocument.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/PDFDocument.py [utf8] Thu Mar 25 13:18:44 2010
@@ -37,7 +37,13 @@
from Products.ERP5.Document.Document import ConversionError
from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin
-class PDFDocument(Image, CachedConvertableMixin):
+
+from zLOG import LOG, WARNING
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
+
+
+class PDFDocument(Image, ConvertableMixin, CachedConvertableMixin):
"""
PDFDocument is a subclass of Image which is able to
extract text content from a PDF file either as text
@@ -98,6 +104,11 @@
resolution=resolution, frame=frame)
# Conversion API
+ security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+ def getAllowedTargetItemList(self):
+ return Image.getAllowedTargetItemList(self) + \
+ [('Text', 'txt'),('Plain Text','text'), ('HTML Document', 'html')]
+
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, **kw):
"""
Modified: erp5/trunk/products/ERP5/Document/TextDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/TextDocument.py?rev=34092&r1=34091&r2=34092&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/TextDocument.py [utf8] Thu Mar 25 13:18:44 2010
@@ -43,9 +43,12 @@
except ImportError:
from Products.ERP5Type.patches.string import Template
+# Mixin import
+from Products.ERP5.mixin.convertable import ConvertableMixin
+
DEFAULT_TEXT_FORMAT = 'text/html'
-class TextDocument(Document, TextContent):
+class TextDocument(Document, TextContent, ConvertableMixin):
"""
A Document contains text which can be formatted using
*Structured Text* or *HTML*. Text can be automatically translated
@@ -146,6 +149,10 @@
if format is None:
# The default is to use ERP5 Forms to render the page
return self.view()
+ # Raise an error if the format is not permitted
+ if not self.isTargetFormatPermitted(format):
+ raise Unauthorized("User does not have enough permission to access document"
+ " in %s format" % (format or 'original'))
mime, data = self.convert(format=format)
RESPONSE.setHeader('Content-Length', len(str(data))) # XXX - Not efficient
# if datastream instance
@@ -200,12 +207,25 @@
substitution_method_parameter_dict = {}
return self._substituteTextContent(subject, safe_substitute=safe_substitute,
**substitution_method_parameter_dict)
-
+
+ security.declareProtected(Permissions.View, 'getAllowedTargetItemList')
+ def getAllowedTargetItemList(self):
+ mime_type = getToolByName(self, 'mimetypes_registry')
+ allowed=[]
+ for extension in mime_type.extensions:
+ allowed.append((mime_type.extensions[extension].name(),extension))
+
+ return [(y, x) for x, y in allowed]
+
security.declareProtected(Permissions.AccessContentsInformation, 'convert')
def convert(self, format, substitution_method_parameter_dict=None, safe_substitute=True, **kw):
"""
Convert text using portal_transforms or oood
"""
+ # Raise an error if the format is not permitted
+ if not self.isTargetFormatPermitted(format):
+ raise Unauthorized("User does not have enough permission to access document"
+ " in %s format" % (format or 'original'))
# Accelerate rendering in Web mode
_setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format})
# Return the raw content
More information about the Erp5-report
mailing list