[Erp5-report] r13637 - /erp5/trunk/products/ERP5OOo/Document/OOoDocument.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Mar 26 13:56:41 CEST 2007
Author: jp
Date: Mon Mar 26 13:56:40 2007
New Revision: 13637
URL: http://svn.erp5.org?rev=13637&view=rev
Log:
Code review and refactoring based on Document API.
Modified:
erp5/trunk/products/ERP5OOo/Document/OOoDocument.py
Modified: erp5/trunk/products/ERP5OOo/Document/OOoDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/Document/OOoDocument.py?rev=13637&r1=13636&r2=13637&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/Document/OOoDocument.py (original)
+++ erp5/trunk/products/ERP5OOo/Document/OOoDocument.py Mon Mar 26 13:56:40 2007
@@ -37,14 +37,17 @@
from AccessControl import ClassSecurityInfo
from OFS.Image import Pdata
from Products.CMFCore.utils import getToolByName
+from Products.CMFCore.utils import _setCacheHeaders
from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5Type.Message import Message
from Products.ERP5Type.Cache import CachingMethod
from Products.ERP5Type.XMLObject import XMLObject
-from Products.ERP5.Document.File import File, stripHtml
+from Products.ERP5.Document.File import File
from Products.ERP5.Document.Document import ConversionCacheMixin, ConversionError
from Products.CMFCore.utils import getToolByName
from Products.DCWorkflow.DCWorkflow import ValidationFailed
+
+from zLOG import LOG
enc=base64.encodestring
dec=base64.decodestring
@@ -95,9 +98,9 @@
isPortalContent = 1
isRADContent = 1
- # Global variables
- snapshot = None
- oo_data = None
+ searchable_property_list = ('asTextContent', 'title', 'description', 'id', 'reference',
+ 'version', 'short_title',
+ 'subject', 'source_reference', 'source_project_title',)
# Declarative security
security = ClassSecurityInfo()
@@ -105,94 +108,73 @@
# Default Properties
property_sheets = ( PropertySheet.Base
+ , PropertySheet.XMLObject
+ , PropertySheet.Reference
, PropertySheet.CategoryCore
, PropertySheet.DublinCore
, PropertySheet.Version
- , PropertySheet.Reference
- , PropertySheet.TextDocument
, PropertySheet.Document
+ , PropertySheet.Snapshot
+ , PropertySheet.ExternalDocument
+ , PropertySheet.Url
+ , PropertySheet.Periodicity
+ , PropertySheet.Snapshot
)
- _properties = (
- # XXX-JPS mime_type should be guessed is possible for the stored file
- # In any case, it should be named differently because the name
- # is too unclear. Moreover, the usefulness of this property is
- # doubtful besides download of converted file. It would be acceptable
- # for me that this property is stored as an internal property
- # or, better, in the conversion workflow attributes.
- #
- # Properties are meant for "orginal document" information,
- # not for calculated attributes.
- { 'id' : 'mime_type',
- 'description' : 'mime type of the converted OOo file stored',
- 'type' : 'string',
- 'mode' : ''},
- )
-
- base_format = 'Open Document Format'
-
- # regexps for stripping xml from docs
+ # regular expressions for stripping xml from ODF documents
rx_strip = re.compile('<[^>]*?>', re.DOTALL|re.MULTILINE)
rx_compr = re.compile('\s+')
- searchable_property_list = File.searchable_property_list + ('text_content', ) # XXX - good idea - should'n this be made more general ?
-
- def index_html(self, REQUEST, RESPONSE, format=None, force=0):
- """
- Standard function - gets converted version (from cache or new)
- sets headers and returns converted data.
-
- Format can be only one string (because we are OOoDocument and do not
- accept more formatting arguments).
-
- Force can force conversion.
- """
- self.log(format, force)
- if (not self.hasOOFile()) or force:
+ def _setFile(self, data, precondition=None):
+ File._setFile(self, data, precondition=precondition)
+ if self.hasBaseData():
+ # This is a hack - XXX - new accessor needed to delete properties
+ delattr(self, 'base_data')
+
+ security.declareProtected(Permissions.View, 'index_html')
+ def index_html(self, REQUEST, RESPONSE, format=None, **kw):
+ """
+ Default renderer with conversion support. Format is
+ a string. The list of available formats can be obtained
+ by calling getTargetFormatItemList.
+ """
+ # Accelerate rendering in Web mode
+ _setCacheHeaders(self, {'format' : format})
+ # Return the original file by default
+ if format is None:
+ return File.index_html(self, REQUEST, RESPONSE)
+ # Make sure file is converted to base format
+ if not self.hasBaseData():
self.convertToBaseFormat()
- if format is None:
- result = self.getOOFile()
- mime = self.getMimeType()
- self.log(mime)
- else:
- try:
- mime, result = self.convert(format=format, force=force)
- except ConversionError, e:
- raise # should we do something here?
- #RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime)) XXX to be implemented
+ # Else try to convert the document and return it
+ result = self.convert(format=format)
+ mime = getToolByName(self, 'mimetypes_registry').lookupExtension('name.%s' % format) # XXX Sad we can not get the mimetype from the conversion cache
+ RESPONSE.setHeader('Content-Length', len(result))
RESPONSE.setHeader('Content-Type', mime)
- #RESPONSE.setHeader('Content-Length', self.size) XXX to be implemented
RESPONSE.setHeader('Accept-Ranges', 'bytes')
- # XXX here we should find out extension for this mime type and append to filename
- RESPONSE.setBase(None)
return result
+ # Format conversion implementation
def _getServerCoordinate(self):
"""
- Returns OOo conversion server data from
- preferences
- """
- pref = getToolByName(self, 'portal_preferences')
- adr = pref.getPreferredOoodocServerAddress()
- nr = pref.getPreferredOoodocServerPortNumber()
- if adr is None or nr is None:
- raise ConversionError('You should set conversion server coordinates in preferences')
- return adr, nr
+ Returns the oood conversion server coordinates
+ as defined in preferences.
+ """
+ preference_tool = getToolByName(self, 'portal_preferences')
+ address = preference_tool.getPreferredOoodocServerAddress()
+ port = preference_tool.getPreferredOoodocServerPortNumber()
+ if not address or not port:
+ raise ConversionError('Can not proceed with conversion: '
+ 'conversion server host and port is not defined in preferences')
+ return address, port
def _mkProxy(self):
- sp=xmlrpclib.ServerProxy('http://%s:%d' % self._getServerCoordinate(), allow_none=True)
- return sp
-
- def returnMessage(self, msg, code=0):
- """
- code > 0 indicates a problem
- we distinguish data return from message by checking if it is a tuple
-
- XXX - This is an error. UI translation is the responsability
- of skins (scripts of page templates).
- """
- m = Message(domain='ui', message=msg)
- return (code, m)
+ """
+ Create an XML-RPC proxy to access the conversion server.
+ """
+ server_proxy = xmlrpclib.ServerProxy('http://%s:%d' % self._getServerCoordinate(),
+ allow_none=True)
+ return server_proxy
security.declareProtected(Permissions.AccessContentsInformation,'getTargetFormatList')
def getTargetFormatItemList(self):
@@ -200,314 +182,184 @@
Returns a list of acceptable formats for conversion
in the form of tuples (for listfield in ERP5Form)
- XXX - to be implemented better (with extended API to conversion server)
- XXX - what does this mean? I don't understand
- """
- # Caching method implementation
+ NOTE: it is the responsability of the conversion server
+ to provide an extensive list of conversion formats.
+ """
def cached_getTargetFormatItemList(content_type):
- sp=self._mkProxy()
- allowed=sp.getAllowedTargets(content_type)
- return [[y,x] for x,y in allowed] # have to reverse tuple order
-
+ server_proxy = self._mkProxy()
+ allowed = server_proxy.getAllowedTargets(content_type) # oood API needs naming convention update
+ return [(y, x) for x, y in allowed] # tuple order is reversed to be compatible with ERP5 Form
+
+ # Cache valid format list
cached_getTargetFormatItemList = CachingMethod(cached_getTargetFormatItemList,
id = "OOoDocument_getTargetFormatItemList",
- cache_factory='erp5_ui_short')
- return cached_getTargetFormatItemList(self.getContentType())
+ cache_factory='erp5_ui_medium')
+
+ return cached_getTargetFormatItemList(self.getBaseContentType())
+
+ security.declareProtected(Permissions.AccessContentsInformation, 'getTargetFormatTitleList')
+ def getTargetFormatTitleList(self):
+ """
+ Returns a list of acceptable formats for conversion
+ """
+ return map(lambda x: x[0], self.getTargetFormatItemList())
security.declareProtected(Permissions.AccessContentsInformation, 'getTargetFormatList')
def getTargetFormatList(self):
"""
Returns a list of acceptable formats for conversion
"""
- return map(lambda x: x[0], self.getTargetFormatItemList())
-
- security.declareProtected(Permissions.ModifyPortalContent, 'reset')
- def reset(self):
- """
- make the object a non-converted one, as if it was brand new
-
- XXX-JPS more explicit name needed
- """
- self.clearConversionCache()
- self.oo_data = None
- m = self.returnMessage('new')
- msg = str(m[1])
- portal_workflow = getToolByName(self, 'portal_workflow')
- portal_workflow.doActionFor(self, 'process', comment=msg)
-
- security.declareProtected(Permissions.ModifyPortalContent,'isAllowed')
- def isAllowed(self, format):
+ return map(lambda x: x[1], self.getTargetFormatItemList())
+
+ security.declareProtected(Permissions.ModifyPortalContent,'isTargetFormatAllowed')
+ def isTargetFormatAllowed(self, format):
"""
Checks if the current document can be converted
- into the specified format.
-
- XXX-JPS more explicit name needed
- """
- allowed = self.getTargetFormatItemList()
- if allowed is None: return False
- return (format in [x[1] for x in allowed])
-
- security.declareProtected(Permissions.ModifyPortalContent,'editMetadata')
- def editMetadata(self, newmeta):
+ into the specified target format.
+ """
+ return format in self.getTargetFormatList()
+
+ security.declarePrivate('_convert')
+ def _convert(self, format):
+ """
+ Communicates with server to convert a file
+ """
+ if format == 'text-content':
+ # Extract text from the ODF file
+ cs = cStringIO.StringIO()
+ cs.write(self._unpackData(self.oo_data))
+ z = zipfile.ZipFile(cs)
+ s = z.read('content.xml')
+ s = self.rx_strip.sub(" ", s) # strip xml
+ s = self.rx_compr.sub(" ", s) # compress multiple spaces
+ cs.close()
+ z.close()
+ return 'text/text', s
+ server_proxy = self._mkProxy()
+ kw = server_proxy.run_generate(self.getId(),
+ enc(self._unpackData(self.getBaseData())),
+ None, format)
+ return kw['mime'], Pdata(dec(kw['data']))
+
+ # Conversion API
+ security.declareProtected(Permissions.View, 'convert')
+ def convert(self, format, **kw):
+ """
+ Implementation of thGet file in a given format.
+ Runs makeFile to make sure we have the requested version cached,
+ then returns from cache.
+ """
+ # Make sure we can support html and pdf by default
+ is_html = 0
+ if format == 'pdf':
+ format_list = [x for x in self.getTargetFormatList() if x.endswith('pdf')]
+ format = format_list[0]
+ elif format == 'html':
+ format_list = [x for x in self.getTargetFormatList() if x.startswith('html')]
+ format = format_list[0]
+ is_html = 1
+ # Raise an error if the format is not supported
+ if not self.isTargetFormatAllowed(format):
+ raise ConversionError, 'Target format %s is not supported' % format
+ # Check if we have already a base conversion
+ if not self.hasBaseData():
+ self.convertToBaseFormat()
+ # Return converted file
+ if not self.hasConversion(format=format):
+ # Do real conversion
+ mime, data = self._convert(format)
+ if is_html:
+ # Extra processing required since
+ # we receive a zip file
+ cs = cStringIO.StringIO()
+ cs.write(self._unpackData(data))
+ z = zipfile.ZipFile(cs)
+ for f in z.infolist():
+ fn = f.filename
+ if fn.endswith('html'):
+ data = z.read(fn)
+ break
+ mime = 'text/html'
+ self.populateContent(zip_file=z)
+ z.close()
+ cs.close()
+ self.setConversion(data, mime, format=format)
+ return self.getConversion(format=format)
+
+ security.declareProtected(Permissions.View, 'asTextContent')
+ def asTextContent(self):
+ """
+ Extract plain text from ooo docs by stripping the XML file.
+ This is the simplest way, the most universal and it is compatible
+ will all formats.
+ """
+ return self.convert(format='text-content')
+
+ security.declareProtected(Permissions.ModifyPortalContent, 'populateContent')
+ def populateContent(self, zip_file=None):
+ """
+ Extract content from the ODF zip file and populate the document.
+ Optional parameter zip_file prevents from converting content twice.
+ """
+ if zip_file is None:
+ format_list = [x for x in self.getTargetFormatList() if x.startswith('html')]
+ format = format_list[0]
+ mime, data = self._convert(format)
+ archive_file = cStringIO.StringIO()
+ archive_file.write(self._unpackData(data))
+ zip_file = zipfile.ZipFile(archive_file)
+ must_close = 1
+ else:
+ must_close = 0
+ for f in zip_file.infolist():
+ file_name = f.filename
+ if not file_name.endswith('html'):
+ document = self.get(file_name, None)
+ if document is not None:
+ self.manage_delObjects([file_name])
+ self.portal_contributions.newContent(id=file_name, container=self,
+ file_name=file_name,
+ data=zip_file.read(file_name))
+ if must_close:
+ zip_file.close()
+ archive_file.close()
+
+ # Base format implementation
+ security.declarePrivate('_convertToBaseFormat')
+ def _convertToBaseFormat(self):
+ """
+ Converts the original document into ODF
+ by invoking the conversion server. Store the result
+ on the object. Update metadata information.
+ """
+ # LOG('in _convertToBaseFormat', 0, self.getRelativeUrl())
+ server_proxy = self._mkProxy()
+ kw = server_proxy.run_convert(self.getSourceReference() or self.getId(),
+ enc(self._unpackData(self.getData())))
+ self._setBaseData(dec(kw['data']))
+ metadata = kw['meta']
+ self._base_metadata = metadata
+ if metadata.get('MIMEType', None):
+ self._setBaseContentType(metadata['MIMEType'])
+
+ security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
+ def getContentInformation(self):
+ """
+ Returns the metadata extracted by the conversion
+ server.
+ """
+ # LOG('in getContentInformation', 0, self.getRelativeUrl())
+ return self._base_metadata
+
+ security.declareProtected(Permissions.ModifyPortalContent, 'updateBaseMetadata')
+ def updateBaseMetadata(self, **kw):
"""
Updates metadata information in the converted OOo document
based on the values provided by the user. This is implemented
through the invocation of the conversion server.
"""
- sp = self._mkProxy()
- kw = sp.run_setmetadata(self.getTitle(), enc(self._unpackData(self.oo_data)), newmeta)
- self.oo_data = Pdata(dec(kw['data']))
- self._setMetaData(kw['meta'])
- return True # XXX why return ? - why not?
-
- security.declarePrivate('_convertToBase')
- def _convertToBaseFormat(self):
- """
- Converts the original document into ODF
- by invoking the conversion server. Store the result
- on the object. Update metadata information.
- """
- sp = self._mkProxy()
- kw = sp.run_convert(self.getSourceReference(), enc(self._unpackData(self.data)))
- self.oo_data = Pdata(dec(kw['data']))
- # now we get text content
- text_data = self.extractTextContent()
- self.setTextContent(text_data)
- self._setMetaData(kw['meta'])
-
- security.declareProtected(Permissions.View,'extractTextContent')
- def extractTextContent(self):
- """
- extract plain text from ooo docs - the simplest way possible, works for all ODF formats
- """
- cs = cStringIO.StringIO()
- cs.write(self._unpackData(self.oo_data))
- z = zipfile.ZipFile(cs)
- s = z.read('content.xml')
- s = self.rx_strip.sub(" ", s) # strip xml
- s = self.rx_compr.sub(" ", s) # compress multiple spaces
- cs.close()
- z.close()
- return s
-
-
- security.declarePrivate('_setMetaData')
- def _setMetaData(self,meta):
- """
- Sets metadata properties of the ERP5 object.
-
- XXX - please double check that some properties
- are not already defined in the Document class (which is used
- for Web Page in ERP5)
-
- XXX - it would be quite nice if the metadata structure
- could also support user fields in OOo
- (user fields are so useful actually...)
- XXX - I think it does (BG)
- """
- for k,v in meta.items():
- meta[k] = v.encode('utf-8')
- self.setTitle(meta.get('title', ''))
- self.setSubject(meta.get('keywords', '').split())
- self.setDescription(meta.get('description', ''))
- #self.setLanguage(meta.get('language',''))
- if meta.get('MIMEType', False):
- self.setContentType(meta['MIMEType'])
- #self.setReference(meta.get('reference',''))
-
- security.declareProtected(Permissions.View, 'getOOFile')
- def getOOFile(self):
- """
- Return the converted OOo document.
-
- XXX - use a propertysheet for this instead. We have a type
- called data in property sheet. Look at File implementation
- XXX - doesn't seem to be there...
- """
- data = self.oo_data
- return data
-
- security.declareProtected(Permissions.View, 'hasOOFile')
- def hasOOFile(self):
- """
- Checks whether we have an OOo converted file
- """
- _marker = []
- if getattr(self, 'oo_data',_marker) is not _marker: # XXX - use propertysheet accessors
- return getattr(self, 'oo_data') is not None
- return False
-
- security.declareProtected(Permissions.View, 'hasSnapshot')
- def hasSnapshot(self):
- """
- Checks whether we have a snapshot.
- """
- _marker = []
- if getattr(self, 'snapshot', _marker) is not _marker: # XXX - use propertysheet accessors
- return getattr(self, 'snapshot') is not None
- return False
-
- security.declareProtected(Permissions.ModifyPortalContent,'createSnapshot')
- def createSnapshot(self,REQUEST=None):
- """
- Create a PDF snapshot
-
- XXX - we should not create a snapshot if some error happened at conversion
- is this checked ?
- XXX - error at conversion raises an exception, so it should be ok
- """
- if self.hasSnapshot():
- if REQUEST is not None:
- return self.returnMessage('already has a snapshot', 1)
- raise ConversionError('already has a snapshot')
- # making snapshot
- # we have to figure out which pdf format to use
- tgts = [x[1] for x in self.getTargetFormatItemList() if x[1].endswith('pdf')]
- if len(tgts) > 1:
- return self.returnMessage('multiple pdf formats found - this shouldnt happen', 2)
- if len(tgts)==0:
- return self.returnMessage('no pdf format found',1)
- fmt = tgts[0]
- self.makeFile(fmt)
- self.snapshot = Pdata(self._unpackData(self.getConversion(format = fmt)[1]))
- return self.returnMessage('snapshot created')
-
- security.declareProtected(Permissions.View,'getSnapshot')
- def getSnapshot(self, REQUEST=None):
- """
- Returns the snapshot.
- """
- if not self.hasSnapshot():
- self.createSnapshot()
- return self.snapshot
-
- security.declareProtected(Permissions.ManagePortal,'deleteSnapshot')
- def deleteSnapshot(self):
- """
- Deletes the snapshot - in theory this should never be done
- """
- try:
- del(self.snapshot)
- except AttributeError:
- pass
-
- def getHtmlRepresentation(self):
- """
- get simplified html version to display
- """
- # we have to figure out which html format to use
- tgts = [x[1] for x in self.getTargetFormatItemList() if x[1].startswith('html')]
- if len(tgts) == 0:
- return 'no html representation available'
- fmt = tgts[0]
- fmt, data = self.convert(fmt)
- cs = cStringIO.StringIO()
- cs.write(self._unpackData(data))
- z = zipfile.ZipFile(cs)
- h = 'could not extract anything'
- for f in z.infolist():
- fn = f.filename
- if fn.endswith('html'):
- h = z.read(fn)
- break
- z.close()
- cs.close()
- return stripHtml(h)
-
- security.declareProtected(Permissions.View, 'convert')
- def convert(self, format, REQUEST=None, force=0):
- """
- Get file in a given format.
- Runs makeFile to make sure we have the requested version cached,
- then returns from cache.
- """
- # first check if we have base
- if not self.hasOOFile():
- self.convertToBaseFormat()
- if not self.isAllowed(format):
- if REQUEST is not None:
- return self.returnMessage('can not convert to ' + format + ' for some reason',1)
- else:
- raise ConversionError, 'can not convert to ' + format + ' for some reason'
- try:
- # make if necessary, return from cache
- self.makeFile(format, force)
- return self.getConversion(format = format)
- except ConversionError,e:
- if REQUEST is not None:
- return self.returnMessage(str(e), 2)
- raise
-
- security.declareProtected(Permissions.View, 'isFileChanged')
- def isFileChanged(self, format):
- """
- Checks whether the file was converted (or uploaded) after last generation of
- the target format
- """
- return not self.hasConversion(format=format)
-
- security.declareProtected(Permissions.ModifyPortalContent, 'makeFile')
- def makeFile(self, format, force=0, REQUEST=None, **kw):
- """
- This method implement the file conversion cache:
- * check if the format is supported
- * check date of last conversion to OOo, compare with date of last
- * if necessary, create new file and cache
- * update file generation time
-
- Fails silently if we have an up to date version.
-
- TODO:
- * support of images in html conversion (as subobjects for example)
-
- XXX-JPS more explicit name needed for method
- """
- if not self.isAllowed(format):
- errstr = '%s format is not supported' % format
- if REQUEST is not None:
- return self.returnMessage(errstr, 2)
- raise ConversionError(errstr)
- if not self.hasOOFile():
- if REQUEST is not None:
- return self.returnMessage('needs conversion', 1)
- raise ConversionError('needs conversion')
- if self.isFileChanged(format) or force:
- try:
- mime, data = self._makeFile(format)
- self.setConversion(data, mime, format = format)
- #self._p_changed = 1 # XXX not sure it is necessary
- except xmlrpclib.Fault, e:
- if REQUEST is not None:
- return self.returnMessage('Problem: %s' % str(e), 2)
- else:
- raise ConversionError(str(e))
- self.updateConversion(format = format)
- if REQUEST is not None:
- return self.returnMessage('%s created' % format)
- else:
- if REQUEST is not None:
- return self.returnMessage('%s file is up to date' % format, 1)
-
- security.declarePrivate('_makeFile')
- def _makeFile(self,format):
- """
- Communicates with server to convert a file
- """
- # real version:
- sp = self._mkProxy()
- kw = sp.run_generate(self.getSourceReference(), enc(self._unpackData(self.oo_data)), None, format)
- return kw['mime'], Pdata(dec(kw['data']))
-
- # make sure to call the right edit methods
- _edit = File._edit
- edit = File.edit
-
- # BG copied from File in case
- security.declareProtected('FTP access', 'manage_FTPget', 'manage_FTPstat', 'manage_FTPlist')
- manage_FTPget = File.manage_FTPget
- manage_FTPlist = File.manage_FTPlist
- manage_FTPstat = File.manage_FTPstat
-
-
-# vim: syntax=python shiftwidth=2
-
+ server_proxy = self._mkProxy()
+ kw = server_proxy.run_setmetadata(self.getId(),
+ enc(self._unpackData(self.getBaseData())),
+ kw)
+ self._setBaseData(dec(kw['data']))
More information about the Erp5-report
mailing list