[Erp5-report] r13439 - in /erp5/trunk/products/ERP5: Document/ Tool/
nobody at svn.erp5.org
nobody at svn.erp5.org
Thu Mar 15 21:33:57 CET 2007
Author: jp
Date: Thu Mar 15 21:33:54 2007
New Revision: 13439
URL: http://svn.erp5.org?rev=13439&view=rev
Log:
The changes enclosed fixe various issues in the previous implementation. It breaks webdav though (wait for next commit to get it back).
Modified:
erp5/trunk/products/ERP5/Document/Document.py
erp5/trunk/products/ERP5/Tool/ContributionTool.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=13439&r1=13438&r2=13439&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py (original)
+++ erp5/trunk/products/ERP5/Document/Document.py Thu Mar 15 21:33:54 2007
@@ -179,6 +179,21 @@
searchable text, explicit relations, implicit relations,
metadata, versions, languages, etc.
+ Documents may either store their content directly or
+ cache content which is retrieved from a specified URL.
+ The second case if often referred as "External Document".
+ Standalone "External Documents" may be created by specifying
+ a URL to the contribution tool which is in charge of initiating
+ the download process and selecting the appropriate document type.
+ Groups of "External Documents" may also be generated from
+ so-called "External Source" (refer to ExternalSource class
+ for more information).
+
+ External Documents may be downloaded once or at
+ regular interval. The later can be useful to update the content
+ of an external source. Previous versions may be stored
+ in place or kept in a separate file.
+
There are currently two types of Document subclasses:
* File for binary file based documents. File
@@ -188,7 +203,10 @@
* TextDocument for text based documents. TextDocument
has subclasses such as Wiki to implement specific
- methods.
+ methods. TextDocument itself has a subclass
+ (XSLTDocument) which provides XSLT based analysis
+ and transformation of XML content based on XSLT
+ templates.
Document classes which implement conversion should use
the ConversionCacheMixin class so that converted values are
@@ -371,6 +389,13 @@
formats require certain permission
"""
pass
+
+ security.declareProtected(Permissions.View, 'asText')
+ def asText(self):
+ """
+ Converts the content of the document to a textual representation.
+ """
+ return self.convert('text')
security.declareProtected(Permissions.View, 'getSearchableText')
def getSearchableText(self, md=None):
@@ -787,7 +812,8 @@
kw = {}
for id in self.propertyIds():
# We should not consider file data
- if id is not 'data' and self.hasProperty(id):
+ if id not in ('data', 'categories_list', 'uid', 'id', 'text_content', ) \
+ and self.hasProperty(id):
kw[id] = self.getProperty(id)
self._backup_input = kw # We could use volatile and pass kw in activate
# if we are garanteed that _backup_input does not
@@ -843,6 +869,7 @@
del(kw['portal_type'])
except KeyError:
pass
+
self.edit(**kw)
# Finish in second stage
Modified: erp5/trunk/products/ERP5/Tool/ContributionTool.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Tool/ContributionTool.py?rev=13439&r1=13438&r2=13439&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Tool/ContributionTool.py (original)
+++ erp5/trunk/products/ERP5/Tool/ContributionTool.py Thu Mar 15 21:33:54 2007
@@ -27,9 +27,9 @@
##############################################################################
import cStringIO
-import pdb
import re
import string
+import urllib2
from AccessControl import ClassSecurityInfo, getSecurityManager
from Globals import InitializeClass, DTMLFile
@@ -41,8 +41,6 @@
from DateTime import DateTime
from Acquisition import aq_base
-NO_DISCOVER_METADATA_KEY = '_v_no_discover_metadata'
-USER_NAME_KEY = '_v_document_user_login'
TEMP_NEW_OBJECT_KEY = '_v_new_object'
_marker = [] # Create a new marker object.
@@ -50,16 +48,17 @@
class ContributionTool(BaseTool):
"""
ContributionTool provides an abstraction layer to unify the contribution
- of documents into an ERP5Site.
-
- ContributionTool is configured in portal_types in
- such way that it can store Text, Spreadsheet, PDF, etc.
-
- The method to use is portal_contributions.newContent, which should receive
- either a portal type or a file name from which type can be derived or a file from which
- content type can be derived, otherwise it will fail.
+ of documents into an ERP5 Site.
+
+ ContributionTool needs to be configured in portal_types (allowed contents) so
+ that it can store Text, Spreadsheet, PDF, etc.
+
+ The main method of ContributionTool is newContent. This method can
+ be provided various parameters from which the portal type and document
+ metadata can be derived.
Configuration Scripts:
+
- ContributionTool_getPropertyDictFromFileName: receives file name and a
dict derived from filename by regular expression, and does any necesary
operations (e.g. mapping document type id onto a real portal_type).
@@ -68,7 +67,6 @@
id = 'portal_contributions'
meta_type = 'ERP5 Contribution Tool'
portal_type = 'Contribution Tool'
- allowed_types = ('File', 'Image', 'Text') # XXX Is this really needed ?
# Declarative Security
security = ClassSecurityInfo()
@@ -77,17 +75,17 @@
manage_overview = DTMLFile( 'explainContributionTool', _dtmldir )
security.declarePrivate('findTypeName')
- def findTypeName(self, file_name, ob):
+ def findTypeName(self, file_name, document):
"""
Finds the appropriate portal type based on the file name
- or if necessary the content of ob
+ or if necessary the content of the document.
"""
portal_type = None
# We should only consider those portal_types which share the
# same meta_type with the current object
valid_portal_type_list = []
for pt in self.portal_types.objectValues():
- if pt.meta_type == ob.meta_type:
+ if pt.meta_type == document.meta_type:
valid_portal_type_list.append(pt.id)
# Check if the filename tells which portal_type this is
@@ -100,7 +98,7 @@
# to check which of the candidates is suitable
if portal_type is None:
# The document is now responsible of telling all its properties
- portal_type = ob.getPropertyDictFromContent().get('portal_type', None)
+ portal_type = document.getPropertyDictFromContent().get('portal_type', None)
if portal_type is not None:
# we check if it matches the candidate list, if there were any
if len(portal_type_list)>1 and portal_type not in portal_type_list:
@@ -113,8 +111,8 @@
if portal_type is None:
# We can not do anything anymore
- return ob.portal_type
- #return None
+ #return document.portal_type # XXX Wrong
+ return None
if portal_type not in valid_portal_type_list:
# We will not be able to migrate ob to portal_type
@@ -124,7 +122,7 @@
return portal_type
security.declareProtected(Permissions.AddPortalContent, 'newContent')
- def newContent(self, id=None, portal_type=None,
+ def newContent(self, id=None, portal_type=None, url=None,
discover_metadata=1, temp_object=0,
user_login=None, **kw):
"""
@@ -134,7 +132,8 @@
the content.
user_login is the name under which the content will be created
- XXX - Is this a security hole ?
+ XXX - this is a security hole which needs to be fixed by
+ making sure only Manager can use this parameter
NOTE:
We always generate ID. So, we must prevent using the one
@@ -147,20 +146,32 @@
# Try to find the file_name
file_name = None
- # check if file was provided
- file = kw.get('file', None)
- if file is not None:
- file_name = file.filename
+ mime_type = None
+ if url is None:
+ # check if file was provided
+ file = kw.get('file', None)
+ if file is not None:
+ file_name = file.filename
+ else:
+ # some channels supply data and file-name separately
+ # this is the case for example for email ingestion
+ # in this case, we build a file wrapper for it
+ data = kw.get('data', None)
+ if data is not None:
+ file_name = kw.get('file_name', None)
+ if file_name is not None:
+ file = cStringIO.StringIO()
+ file.write(data)
+ file.seek(0)
else:
- # some channels supply data and file name separately
- # we have to build an object
- data = kw.get('data', None)
- if data is not None:
- file_name = kw.get('file_name', None)
- if file_name is not None:
- file = cStringIO.StringIO()
- file.write(data)
- file.seek(0)
+ # build a new file from the url
+ file = urllib2.urlopen(url)
+ file_name = url.split('/')[-1]
+ if hasattr(file, 'headers'):
+ headers = file.headers
+ if hasattr(headers, 'type'):
+ mime_type = headers.type
+ kw['file'] = file
# If the portal_type was provided, we can go faster
if portal_type is not None and portal_type != '':
@@ -178,11 +189,11 @@
raise ValueError, "could not determine portal type"
# So we will simulate WebDAV to get an empty object
- # with PUT_factory
- ob = self.PUT_factory( file_name, None, None )
+ # with PUT_factory - we provide the mime_type as
+ # parameter
+ ob = self.PUT_factory( file_name, mime_type, None )
# Raise an error if we could not guess the portal type
- # XXX Maybe we should try to pass the typ param
if ob is None:
raise ValueError, "Could not determine the document type"
@@ -197,9 +208,7 @@
BaseTool._delObject(self, file_name)
# Move the document to where it belongs
- if not discover_metadata: setattr(self, NO_DISCOVER_METADATA_KEY, 1)
- setattr(ob, USER_NAME_KEY, user_login)
- document = self._setObject(file_name, ob)
+ document = self._setObject(file_name, ob, user_login=user_login)
# Time to empty the cache
if hasattr(self, '_v_document_cache'):
@@ -209,7 +218,7 @@
# Reindex it and return the document
# XXX seems we have to commit now, otherwise it is not reindexed properly later
# dunno why
- get_transaction().commit()
+ get_transaction().commit() # XXX-JPS - WHAT IS THIS ?????????????????????
document.reindexObject()
return document
@@ -241,9 +250,9 @@
method = self._getTypeBasedMethod('getPropertyDictFromFileName',
fallback_script_id = 'ContributionTool_getPropertyDictFromFileName')
property_dict = method(file_name, property_dict)
- if property_dict.has_key('portal_type'):
+ if property_dict.has_key('portal_type') and property_dict['portal_type']:
# we have to return portal_type as a tuple
- # because we can allow for having multiple types (candidates)
+ # because we should allow for having multiple candidate types
property_dict['portal_type'] = (property_dict['portal_type'],)
else:
# we have to find candidates by file extenstion
@@ -279,30 +288,41 @@
Refer to: NullResource.PUT
"""
- # Find the portal type based on file name and content
- # We provide ob in the context of self to make sure scripting is possible
- portal_type = self.findTypeName(name, ob.__of__(self))
- if portal_type is None:
- raise TypeError, "Unable to determine portal type"
-
- # We know the portal_type, let us find the module
- module = self.getDefaultModule(portal_type)
-
- # Set the object on the module and fix the portal_type and id
- new_id = module.generateNewId()
- ob.portal_type = portal_type
- ob.id = new_id
- module._setObject(new_id, ob)
-
- # We can now discover metadata unless NO_DISCOVER_METADATA_KEY was set on ob
- document = module[new_id]
- user_login = getattr(self, USER_NAME_KEY, None)
- if not getattr(ob, NO_DISCOVER_METADATA_KEY, 0): document.discoverMetadata(file_name=name, user_login=user_login)
-
- # Keep the document close to us
- if not hasattr(self, '_v_document_cache'):
- self._v_document_cache = {}
- self._v_document_cache[name] = document.getRelativeUrl()
+ # _setObject is called by constructInstance at a time
+ # when the object has no portal_type defined yet. It
+ # will be removed later on. We can safely store the
+ # document inside us at this stage. Else we
+ # must find out where to store it.
+ if not ob.__dict__.has_key('portal_type'):
+ BaseTool._setObject(self, name, ob)
+ document = self[name]
+ else:
+ # We give the system a last chance to analyse the
+ # portal_type based on the document content
+ # (ex. a Memo is a kind of Text which can be identified
+ # by the fact it includes some specific content)
+ portal_type = self.findTypeName(name, ob.__of__(self))
+ if portal_type is None: portal_type = ob.portal_type
+ ob._setPortalTypeName(portal_type) # This is redundant with finishConstruction
+ # but necessary to move objects to appropriate
+ # location based on their content. Since the
+ # object is already constructed here, we
+ # can safely change its portal_type
+ # Now we know the portal_type, let us find the module
+ # to which we should move the document to
+ module = self.getDefaultModule(ob.portal_type)
+ new_id = module.generateNewId()
+ ob.id = new_id
+ module._setObject(new_id, ob)
+
+ # We can now discover metadata
+ document = module[new_id]
+ document.discoverMetadata(file_name=name, user_login=user_login)
+
+ # Keep the document close to us
+ if not hasattr(self, '_v_document_cache'):
+ self._v_document_cache = {}
+ self._v_document_cache[name] = document.getRelativeUrl()
# Return document to newContent method
return document
More information about the Erp5-report
mailing list