[Erp5-report] r10538 - /erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py

nobody at svn.erp5.org nobody at svn.erp5.org
Wed Oct 4 17:35:31 CEST 2006


Author: bartek
Date: Wed Oct  4 17:35:29 2006
New Revision: 10538

URL: http://svn.erp5.org?rev=10538&view=rev
Log:
fixed url recording; clean multiple slashes from urls;

Modified:
    erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py

Modified: erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py?rev=10538&r1=10537&r2=10538&view=diff
==============================================================================
--- erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py (original)
+++ erp5/trunk/products/ERP5OOo/Document/ExternalWebPage.py Wed Oct  4 17:35:29 2006
@@ -166,8 +166,6 @@
     if (inf.getmaintype(),inf.getsubtype())!=('text','html'):
       raise SpiderException(100,'this is %s/%s' % (inf.getmaintype(),inf.getsubtype()))
     top=self._findTopObject()
-    # record my url in top object
-    top.addUrl(self.getQualifiedUrl())
     # remove current subobjects
     self.manage_delObjects([i.getId() for i in self.searchFolder(portal_type='External Web Page')])
     if self.getOptionRecursively()>0 and self.getRecursionDepth()>0:
@@ -179,12 +177,18 @@
           continue
         ref=re.sub('#.*','',ref)
         if ref=='':continue
-        baseref='/'.join(self.getQualifiedUrl().split('/')[:-1])
+        #baseref='/'.join(self.getQualifiedUrl().split('/'))
+        baseref=self.getQualifiedUrl()
         if not ref.startswith('http'):
           # complete relative paths
           ref=baseref+'/'+ref
+        # eliminate multiple slashes
+        rx=re.compile('([^:]{1})\/{2,}')
+        ref=re.sub(rx,'\1/',ref)
         # create subobjects
         if ref.startswith(baseref) and not top.checkUrl(ref):
+          # record my url in top object
+          top.addUrl(ref)
           n=self.newContent(portal_type='External Web Page')
           # set coordinates
           n.setUrlProtocol('http')




More information about the Erp5-report mailing list