[Erp5-report] r14369 - /erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/
nobody at svn.erp5.org
nobody at svn.erp5.org
Fri May 4 15:54:43 CEST 2007
Author: jp
Date: Fri May 4 15:54:43 2007
New Revision: 14369
URL: http://svn.erp5.org?rev=14369&view=rev
Log:
Renamed extension files. Fixed bug in document extraction (whenever text is a data stream rather than a string)
Removed:
erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/asSecurityGroupId.py
erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/cutFound.py
erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/documentUtils.py
erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/mailUtils.py
erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/searchUtils.py
Removed: erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/asSecurityGroupId.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/asSecurityGroupId.py?rev=14368&view=auto
==============================================================================
--- erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/asSecurityGroupId.py (original)
+++ erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/asSecurityGroupId.py (removed)
@@ -1,65 +1,0 @@
-def asSecurityGroupId(self,**kw):
- ## Script (Python) "xERP5Type_asSecurityGroupId"
- ##bind container=container
- ##bind self=self
- ##bind namespace=
- ##bind script=script
- ##bind subpath=traverse_subpath
- ##parameters=category_order, **kw
- ##title=
- ##
- # category_order : list of base_categories we want to use to generate the group id
- # kw : keys should be base categories,
- # values should be value of corresponding relative urls (obtained by getBaseCategory())
- #
- # Example call : self.ERP5TypeSecurity_asGroupId(category_order=('site', 'group', 'function'),
- # site='france/lille', group='nexedi', function='accounting/accountant')
- # This will generate a string like 'LIL_NXD_ACT' where "LIL", "NXD" and "ACT" are the codification
- # of respecively "france/lille", "nexedi" and "accounting/accountant" categories
- #
- # ERP5Type_asSecurityGroupId can also return a list of users whenever a category points
- # to a Person instance. This is useful to implement user based local role assignments
-
-
- code_list = []
- user_list = []
-
- # sort the category list lexicographically
- # this prevents us to choose the exact order we want,
- # but also prevents some human mistake to break everything by creating site_function instead of function_site
- category_order=kw.get('category_order',None)
- if category_order not in (None, ''):
- category_order = list(category_order)
- category_order.sort()
- else:
- category_order = []
-
- for base_category in category_order:
- if kw.has_key(base_category):
- category_list = kw[base_category]
- if type(category_list)==type(''):
- category_list = [category_list]
- for category in category_list:
- category_path = '%s/%s' % (base_category, category)
- category_object = self.portal_categories.getCategoryValue(category_path)
- if category_object in (None, ''):
- raise "SecurityRoleDefinitionError", "Category '%s' doesn't exist" % (category_path)
- if category_object.getPortalType() == 'Person':
- # We define a person here
- user_name = category_object.getReference()
- if user_name is not None: user_list.append(user_name)
- elif category_object.getPortalType() == 'Project':
- # We use the project reference as a group
- category_code = category_object.getReference(category_object.getTitle())
- code_list.append(category_code)
- else:
- # We define a group item here
- category_code = category_object.getCodification() or category_object.getId()
- code_list.append(category_code)
-
- # Return a list of users or a single group
- if user_list:
- #self.log('user_list',user_list)
- return user_list
- #self.log('code_list',code_list)
- return '_'.join(code_list)
Removed: erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/cutFound.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/cutFound.py?rev=14368&view=auto
==============================================================================
--- erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/cutFound.py (original)
+++ erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/cutFound.py (removed)
@@ -1,96 +1,0 @@
-import string, re
-
-redundant_chars='"\'.:;,-+<>()*~' # chars we need to strip from a word before we see if it matches, and from the searchwords to eliminate boolean mode chars
-tr=string.maketrans(redundant_chars,' '*len(redundant_chars))
-
-class Done(Exception):
- pass
-
-class Word(str):pass
-
-class FoundWord(str):
-
- def __str__(self):
- return self.tags[0]+self+self.tags[1]
-
-class Part:
-
- def __init__(self,tags,trail):
- self.chain=[]
- self.limit=trail
- self.trail=trail
- self.has=False
- self.tags=tags
-
- def push(self,w):
- self.chain.insert(0,Word(w))
- if len(self.chain)>self.limit:
- if self.has:
- self.chain.reverse()
- raise Done()
- self.chain.pop()
-
- def add(self,w):
- self.chain.insert(0,FoundWord(w))
- self.limit+=self.trail+1
- self.has=True
-
- def __str__(self):
- return '...%s...' % ' '.join(map(str,self.chain))
-
-
-
-def generateParts(context,text,sw,tags,trail,maxlines):
- par=Part(tags,trail)
- sw=sw.translate(tr).strip().lower().split()
- test=lambda w:w.translate(tr).strip().lower() in sw
- i=0
- length=len(text)
- for counter,aw in enumerate(text):
- if i==maxlines:
- raise StopIteration
- if test(aw):
- par.add(aw)
- else:
- try:
- par.push(aw)
- except Done:
- i+=1
- yield par
- par=Part(tags,trail)
- if counter==length-1:
- if par.has:
- par.chain.reverse()
- yield par # return the last marked part
-
-
-def cutFound(context,txt,sw,tags,trail,maxlines):
- # initialize class
- FoundWord.tags=tags
- # strip html tags (in case it is a web page - we show result without formatting)
- r=re.compile('<script>.*?</script>',re.DOTALL|re.IGNORECASE)
- r=re.compile('<head>.*?</head>',re.DOTALL|re.IGNORECASE)
- txt=re.sub(r,'',txt)
- r=re.compile('<([^>]+)>',re.DOTALL|re.IGNORECASE)
- txt=re.sub(r,'',txt)
- r=re.compile('\s+')
- txt=re.sub(r,' ',txt)
- txt=txt.replace('-',' - ') # to find hyphenated occurrences
- text = ' '.join(txt.split('\n')).split(' ') # very rough tokenization
- return [p for p in generateParts(context,text,sw,tags,trail,maxlines)]
-
-
-if __name__=='__main__':
- sw='pricing priority right acting proportion'
- txt=' '.join([l.strip() for l in open('offer.txt').readlines()])
-
- # configuration
-
- tags=('<b>','</b>')
- trail=5
- maxlines=5
- for p in cutFound(None,txt,sw,tags,trail,maxlines):
- print p
-
-
-# vim: filetype=python syntax=python shiftwidth=2
Removed: erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/documentUtils.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/documentUtils.py?rev=14368&view=auto
==============================================================================
--- erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/documentUtils.py (original)
+++ erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/documentUtils.py (removed)
@@ -1,77 +1,0 @@
-import zipfile, cStringIO, re
-import xmlrpclib, base64
-from Products.CMFCore.utils import getToolByName
-
-def extractContent(data):
- """
- extract text content from ODF data
- directly by unzipping (no need for oood here)
- """
- # XXX probably not used - to really get text content it should
- # strip xml too
- cs = cStringIO.StringIO()
- cs.write(data)
- try:
- z = zipfile.ZipFile(cs)
- except zipfile.BadZipfile:
- cs.close()
- return ''
- s = z.read('content.xml')
- cs.close()
- z.close()
- return s
-
-###### XXX these methods repeat what is in OOoDocument class
-# maybe redundant, but we need to access them from Script (Python)
-
-def convertToOdf(self, name, data):
- """
- convert data into ODF format
- to be used in ingestion when we don't yet have an ERP5 object
- to work with (and we for example have to figure out portal_type)
- """
- sp = mkProxy(self)
- kw = sp.run_convert(name,base64.encodestring(data))
- odf = base64.decodestring(kw['data'])
- return odf
-
-def mkProxy(self):
- pref = getToolByName(self,'portal_preferences')
- adr = pref.getPreferredDmsOoodocServerAddress()
- nr = pref.getPreferredDmsOoodocServerPortNumber()
- if adr is None or nr is None:
- raise Exception('you should set conversion server coordinates in preferences')
- sp = xmlrpclib.ServerProxy('http://%s:%d' % (adr,nr), allow_none=True)
- return sp
-
-def generateFile(self, name, data, format):
- sp = mkProxy(self)
- kw = sp.run_generate(name, data, None, format)
- res = base64.decodestring(kw['data'])
- return res
-
-def getAttrFromFilename(self, fname):
- """
- parse file name using regexp specified in preferences
- """
- rx_parse = re.compile(self.portal_preferences.getPreferredDmsFilenameRegexp())
- m = rx_parse.match(fname)
- if m is None:
- return {}
- return m.groupdict()
-
-def getLastWorkflowDate(self, state_name='simulation_state', state=('released','public')):
- '''we can make something more generic out of it
- or JP says "there is an API for it" and we trash this one'''
- for name,wflow in self.workflow_history.items():
- if len(wflow) == 0: continue # empty history
- if wflow[0].get(state_name) is None: continue # not the right one
- for i in range(len(wflow)):
- ch = wflow[-1-i]
- act = ch.get('action', '')
- if act is not None and act.endswith('action'):
- if ch.get(state_name, '') in state:
- return ch['time']
- return 0
-
-# vim: syntax=python shiftwidth=2
Removed: erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/mailUtils.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/mailUtils.py?rev=14368&view=auto
==============================================================================
--- erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/mailUtils.py (original)
+++ erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/mailUtils.py (removed)
@@ -1,27 +1,0 @@
-import re
-
-def findAddress(txt):
- """
- find email address in a string
- """
- validchars='0-9A-Za-z.\-_'
- r=re.compile('[%s]+@[%s]+' % (validchars,validchars))
- m=r.search(txt)
- return m and m.group()
-
-def extractParams(txt):
- """
- extract parameters given in mail body
- We assume that parameters are given as lines of the format:
- name:value
- """
- r=re.compile('^([\w_]+):([\w_/]+)$')
- res=[]
- for line in txt.split():
- found=r.findall(line.strip())
- if len(found)==1:
- res.append(found[0])
- return dict(res)
-
-
-# vim: shiftwidth=2
Removed: erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/searchUtils.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/searchUtils.py?rev=14368&view=auto
==============================================================================
--- erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/searchUtils.py (original)
+++ erp5/trunk/bt5/erp5_dms/ExtensionTemplateItem/searchUtils.py (removed)
@@ -1,99 +1,0 @@
-"""
-RULES
-
-Single arguments:
- - arg:value translates into arg='value' in query
- - quotes are cleared
- - if value contains spaces, punctuation or anything else it has to be put in quotes
- - file is source_reference (original file name)
- - language, version, reference
-
-Multiple arguments:
- - arg:xxx works the same way
- - arg:(xxx,yyy) ORs both
- - arg:all translates into empty tuple, which implies all available values
- - state (simulation_state), type (portal_type)
-
-Everything else is treated as SearchableText
-"""
-
-# XXX score:
-# pythonicity: high
-# obfuscation level: brain-twisting
-
-# how to customize:
-# (1) think for two hours
-# (2) type for 20 seconds
-
-import re
-import sys
-sys.path.append('/usr/lib/zope/lib/python/')
-from DateTime import DateTime
-
-def dateRangeProc(s):
- """
- process date range (can be given in months or years)
- """
- m=re.match('(\d)([wmy]).*',s)
- try:
- dif=0
- gr=m.groups()
- if gr[1]=='w':dif=int(gr[0])*7
- if gr[1]=='m':dif=int(gr[0])*30
- if gr[1]=='y':dif=int(gr[0])*365
- return ('creation_from',DateTime()-dif)
- except AttributeError, IndexError:
- return ()
-
-# parsing defined here
-simulation_states=()
-r=re.compile('(\w+:"[^"]+"|\w+:\([^)]+\)|\w+:[\(\),\w/\-.]+)')
-filetyper=lambda s:('source_reference','%%.%s' % s)
-filestripper=lambda s: ('source_reference',s.replace('"',''))
-#addarchived=lambda s: ('simulation_state',simulation_states+('archived',))
-state=lambda s:('simulation_state',parsestates(s))
-type=lambda s:('portal_type',parsestates(s))
-paramsmap=dict(file=filestripper,type=type,reference='reference',filetype=filetyper,state=state,\
- language='language',version='version',created=dateRangeProc)
-
-def parsestates(s):
- print s
- if s=='all':
- return ()
- if s[0]=='(' and s[-1]==')':
- return [i.replace('"','').replace("'","") for i in s[1:-1].split(',') if i!='']
- return s.replace('"','').replace("'","")
-
-def analyze(params):
- params['SearchableText']=''
- params['simulation_state']=simulation_states
- def cutter(s):
- ss=s.split(':')
- if len(ss)==1:
- params['SearchableText']+=ss[0]
- if len(ss)==2:
- try:
- ps=paramsmap.get(ss[0])(ss[1])
- params[ps[0]]=ps[1]
- except TypeError:
- if paramsmap.has_key(ss[0]):
- params[paramsmap.get(ss[0])]=ss[1]
- else:
- params[ss[0]]=ss[1]
- except IndexError:
- return
- return cutter
-
-def parseSearchString(searchstring):
- params={}
- l=r.split(searchstring)
- print l
- map(analyze(params),l)
- params['SearchableText']=params['SearchableText'].strip()
- return params
-
-if __name__=='__main__':
- #searchstring='byle cisnie zego file:"ble ble.doc" filetype:doc type:Text poza tym reference:abc-def'
- #searchstring='byle "cisnie zego" state:draft file:"ble ble.doc" type:("Site","Text") poza tym reference:abc-def dupa:kwas/zbita'
- searchstring='byleco created:3mth'
- print parseSearchString(searchstring)
More information about the Erp5-report
mailing list