[Erp5-report] r42766 jp - in /experimental/bt5/erp5_email_reader: DocumentTemplateItem/ Pat...
nobody at svn.erp5.org
nobody at svn.erp5.org
Sat Jan 29 18:42:22 CET 2011
Author: jp
Date: Sat Jan 29 18:42:22 2011
New Revision: 42766
URL: http://svn.erp5.org?rev=42766&view=rev
Log:
Brute force commit with working EmailReader code for UNG
Modified:
experimental/bt5/erp5_email_reader/DocumentTemplateItem/EmailReader.py
experimental/bt5/erp5_email_reader/PathTemplateItem/portal_alarms/email_reader_upload_pending.xml
experimental/bt5/erp5_email_reader/bt/revision
Modified: experimental/bt5/erp5_email_reader/DocumentTemplateItem/EmailReader.py
URL: http://svn.erp5.org/experimental/bt5/erp5_email_reader/DocumentTemplateItem/EmailReader.py?rev=42766&r1=42765&r2=42766&view=diff
==============================================================================
--- experimental/bt5/erp5_email_reader/DocumentTemplateItem/EmailReader.py [utf8] (original)
+++ experimental/bt5/erp5_email_reader/DocumentTemplateItem/EmailReader.py [utf8] Sat Jan 29 18:42:22 2011
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2008 Nexedi SA and Contributors. All Rights Reserved.
@@ -28,8 +29,10 @@
# ERP5 imports
from AccessControl import ClassSecurityInfo
from Products.CMFCore.utils import getToolByName
-from Products.ERP5Type import Permissions, PropertySheet
+from Products.CMFCore.WorkflowCore import WorkflowMethod
+from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface
from Products.ERP5.Document.ExternalSource import ExternalSource
+from Products.ERP5Type.XMLObject import XMLObject
from Products.ERP5Type.Cache import getReadOnlyTransactionCache, enableReadOnlyTransactionCache
# IMAP imports
@@ -131,6 +134,7 @@ class IMAPSServer(MailServer):
if self.message_folder is _MARKER or self.message_folder != message_folder:
self.message_folder = message_folder
if message_folder:
+ LOG('message_folder',0, message_folder)
response, message_count = self.server.select(message_folder) # XXX response not taken into account
else:
response, message_count = self.server.select() # XXX response not taken into account
@@ -168,13 +172,14 @@ class IMAPSServer(MailServer):
result = []
response, folder_list = self.server.list() # XXX - reponse not taken into account
for folder in folder_list:
- folder_definition_list = folder.split(' "." ')
- # XXX - Here we consider that
- # strings are of the form '(\\HasChildren) "." "INBOX.Business.OpenBrick.Prospects"'
- # but is this really the generic case - review IMAP protocol
- folder_type, folder_name = folder_definition_list
- folder_name = folder_name[1:-1]
- result.append(folder_name)
+ if folder is not None:
+ folder_definition_list = folder.split(' "." ')
+ # XXX - Here we consider that
+ # strings are of the form '(\\HasChildren) "." "INBOX.Business.OpenBrick.Prospects"'
+ # but is this really the generic case - review IMAP protocol
+ folder_type, folder_name = folder_definition_list
+ folder_name = folder_name[1:-1]
+ result.append(folder_name)
return result
def getMessageData(self, uid, message_folder=None):
@@ -270,10 +275,11 @@ class EmailReader(ExternalSource):
, PropertySheet.DublinCore
, PropertySheet.Url
, PropertySheet.Login
+ , PropertySheet.ExternalDocument
)
# Global values
- MAX_UID_LIST_SIZE = 20 # The number of messages to ingest at once
+ MAX_UID_LIST_SIZE = 100 # The number of messages to ingest at once
### Implementation - generic part - IMAP independent
security.declareProtected(Permissions.ModifyPortalContent, 'crawlContent')
@@ -291,6 +297,15 @@ class EmailReader(ExternalSource):
else:
# A scope is defined, so we only need to filter
folder_list = filter(lambda x: x in available_folder_list, folder_list)
+ # Interleave default folder for better reactivity
+ default_folder = self.getDefaultCrawlingScope()
+ if default_folder and default_folder in available_folder_list:
+ interleaved_list = []
+ for folder in folder_list:
+ interleaved_list.append(default_folder)
+ interleaved_list.append(folder)
+ folder_list = interleaved_list
+ # And trigger activities
self.activate(activity='SQLQueue', priority=2,
after_tag=list_activity_tag).crawlMessageFolderList(folder_list)
# XXX - Start with default one and only use filtered mailboxes if defined
@@ -299,11 +314,15 @@ class EmailReader(ExternalSource):
def crawlMessageFolderList(self, message_folder_list):
"""
Take the first folder in the message_folder_list and start
- ingesting messaged. Then postpone ingestion for the rest of
+ ingesting messages. Then postpone ingestion for the rest of
folders.
XXX - TODO: crawl 10 folders at once
"""
+ # Init dict
+ if getattr(self, '_latest_uid', None) is None:
+ self._latest_uid = {} # Keeps track of latest ingested
+
# Take a single folder at once
message_folder = message_folder_list[0]
message_folder_list = message_folder_list[1:]
@@ -316,6 +335,14 @@ class EmailReader(ExternalSource):
message_uid_list = self._getMailServer().getMessageUIDList(message_folder=message_folder)
except ValueError, error_message: # Use a better exception here XXX
message_uid_list = []
+ # Reduce list size based on asumption of growing sequence of uids
+ latest_uid = self._latest_uid.get(message_folder, 0)
+ message_uid_list = filter(lambda uid: int(uid) > latest_uid, message_uid_list)
+ # And update biggest uid - for next time
+ for uid in message_uid_list:
+ # Cache lastest UID (only works if uid is increasing) - XXX-JPS
+ if int(uid) > self._latest_uid.get(message_folder, 0):
+ self._latest_uid[message_folder] = int(uid)
# Do not retrieve existing messages - XXX maybe there is a faster way to compute this
# This should probably be handled within ingestMessageList and splitted among
# activities
@@ -353,8 +380,8 @@ class EmailReader(ExternalSource):
message_id = self.getMessageID(uid, message_folder)
file_name = '%s.eml' % message_id
contribution_tool.newContent(container=self, data=message_data,
- file_name=file_name, id=message_id,
- portal_type='Email Thread') # It would be good to make this implicit
+ filename=file_name, id=message_id,
+ portal_type='Email Thread') # It would be good to make this implicit
LOG('ingestMessage in folder: %s' % message_folder, INFO, str(uid))
security.declareProtected(Permissions.ModifyPortalContent, 'ingestMessageList')
@@ -393,6 +420,13 @@ class EmailReader(ExternalSource):
tag=list_activity_tag).ingestMessageList(
uid_list[self.MAX_UID_LIST_SIZE:], message_folder=message_folder)
+ security.declareProtected(Permissions.ModifyPortalContent, 'resetMessageIngestionCache')
+ def resetMessageIngestionCache(self):
+ """
+ Reset the caches related to message ingestion.
+ """
+ self._latest_uid = {} # Keeps track of latest ingested
+
security.declareProtected(Permissions.AccessContentsInformation, 'getMessageID')
def getMessageID(self, uid, message_folder=None):
"""
@@ -422,7 +456,9 @@ class EmailReader(ExternalSource):
else:
raise "No Conversion Cache" # XXX - Implement this better
- result = self._getMailServer().getMessageFolderList()
+ server = self._getMailServer()
+ if server is None: return ()
+ result = server.getMessageFolderList()
if cache is not None:
cache[key] = result
@@ -451,8 +487,11 @@ class EmailReader(ExternalSource):
else:
raise "No Conversion Cache" # XXX - Implement this better
+ # No server defined
+ if not self.getURLServer(): return None
+
# XXX - Here we need to add a switch (POP vs. IMAP vs. IMAPS etc.)
- url_protocol = self.getUrlProtocol()
+ url_protocol = self.getUrlProtocol('imaps') # Default to IMAP
if url_protocol == 'imaps':
result = IMAPSServer(self.getURLServer(), self.getUserId(), self.getPassword(), port=self.getURLPort())
elif url_protocol == 'imap':
@@ -468,3 +507,4 @@ class EmailReader(ExternalSource):
cache[key] = result
return result
+
Modified: experimental/bt5/erp5_email_reader/PathTemplateItem/portal_alarms/email_reader_upload_pending.xml
URL: http://svn.erp5.org/experimental/bt5/erp5_email_reader/PathTemplateItem/portal_alarms/email_reader_upload_pending.xml?rev=42766&r1=42765&r2=42766&view=diff
==============================================================================
--- experimental/bt5/erp5_email_reader/PathTemplateItem/portal_alarms/email_reader_upload_pending.xml [utf8] (original)
+++ experimental/bt5/erp5_email_reader/PathTemplateItem/portal_alarms/email_reader_upload_pending.xml [utf8] Sat Jan 29 18:42:22 2011
@@ -156,25 +156,13 @@
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
- <tuple>
- <tuple>
- <string>BTrees.Length</string>
- <string>Length</string>
- </tuple>
- <none/>
- </tuple>
+ <global name="Length" module="BTrees.Length"/>
</pickle>
<pickle> <int>0</int> </pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
- <tuple>
- <tuple>
- <string>BTrees.OOBTree</string>
- <string>OOBTree</string>
- </tuple>
- <none/>
- </tuple>
+ <global name="OOBTree" module="BTrees.OOBTree"/>
</pickle>
<pickle>
<none/>
@@ -182,13 +170,7 @@
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
- <tuple>
- <tuple>
- <string>BTrees.OOBTree</string>
- <string>OOBTree</string>
- </tuple>
- <none/>
- </tuple>
+ <global name="OOBTree" module="BTrees.OOBTree"/>
</pickle>
<pickle>
<none/>
Modified: experimental/bt5/erp5_email_reader/bt/revision
URL: http://svn.erp5.org/experimental/bt5/erp5_email_reader/bt/revision?rev=42766&r1=42765&r2=42766&view=diff
==============================================================================
--- experimental/bt5/erp5_email_reader/bt/revision [utf8] (original)
+++ experimental/bt5/erp5_email_reader/bt/revision [utf8] Sat Jan 29 18:42:22 2011
@@ -1 +1 @@
-19
+20
\ No newline at end of file
More information about the Erp5-report
mailing list