[Erp5-report] r13679 - /erp5/trunk/products/ERP5/Document/Document.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Mar 26 20:45:40 CEST 2007
Author: jp
Date: Mon Mar 26 20:45:39 2007
New Revision: 13679
URL: http://svn.erp5.org?rev=13679&view=rev
Log:
Added base support base on HTML content.
Modified:
erp5/trunk/products/ERP5/Document/Document.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=13679&r1=13678&r2=13679&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py (original)
+++ erp5/trunk/products/ERP5/Document/Document.py Mon Mar 26 20:45:39 2007
@@ -380,6 +380,7 @@
href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE)
body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL)
title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL)
+ base_parser = re.compile('<base[^>]*href=[\'"](.*?)[\'"][^>]*>', re.IGNORECASE + re.DOTALL)
# Declarative security
security = ClassSecurityInfo()
@@ -1134,13 +1135,12 @@
Returns the content base URL based on the actual content or
on its URL.
"""
- # XXX TODO - try to retrieve base URL from content
- # If no base_url defined, define the base URL from our URL
base_url = self.asURL()
base_url_list = base_url.split('/')
if len(base_url_list):
- if base_url_list[-1]:
+ if base_url_list[-1] and base_url_list[-1].find('.') > 0:
# Cut the trailing part in http://www.some.site/at/trailing.html
+ # but not in http://www.some.site/at
base_url = '/'.join(base_url_list[:-1])
return base_url
More information about the Erp5-report
mailing list