[Erp5-report] r33438 nicolas - in /erp5/trunk/products/ERP5: Document/ interfaces/
nobody at svn.erp5.org
nobody at svn.erp5.org
Fri Mar 5 12:04:36 CET 2010
Author: nicolas
Date: Fri Mar 5 12:04:35 2010
New Revision: 33438
URL: http://svn.erp5.org?rev=33438&view=rev
Log:
Implement asSafeHTML output for documents:
- It aims to strip html documents and remove
non safe contents like emmbed javascript, forms,
import of externals multimediai contents, ...
- usefull to display HTML attachments of ingested events
- Use portal_transforms as conversion engine (and its transform safe_html).
reviewed by Kazuhiko
Modified:
erp5/trunk/products/ERP5/Document/Document.py
erp5/trunk/products/ERP5/Document/EmailDocument.py
erp5/trunk/products/ERP5/interfaces/html_convertable.py
Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Fri Mar 5 12:04:35 2010
@@ -1204,6 +1204,25 @@
mime, html = self.convert(**kw)
return self._stripHTML(str(html))
+ security.declareProtected(Permissions.View, 'asSafeHTML')
+ def asSafeHTML(self, **kw):
+ """
+ Converts the current document to HTML, strip it and remove
+ emmbed javascript, forms, any external plugins imports.
+ """
+ format = 'text/x-html-safe'
+ if not self.hasBaseData():
+ return ''
+ try:
+ mime, data = self.getConversion(format=format)
+ return data
+ except KeyError:
+ kw['format'] = 'html'
+ mime, html = self.convert(**kw)
+ safe_html = self._safeHTML(str(html), format=format)
+ self.setConversion(safe_html, mime=mime, format=format)
+ return safe_html
+
def _guessEncoding(self, string):
"""
Try to guess the encoding for this string.
@@ -1239,6 +1258,34 @@
return str(stripped_html)
return stripped_html
+ def _safeHTML(self, html, format='text/x-html-safe', charset=None):
+ """
+ A private method to strip HTML content in safe mode,
+ w/o emmbed javascript, forms and any external plugins imports.
+ This should be used when we do not trust the user (Anonymous)
+ who push data into database.
+ - html: content to strip
+ - format: destination format
+ - charset: charset used to encode string. Take precedence
+ on charset values found in html string
+ """
+ portal = self.getPortalObject()
+ if charset is None:
+ # find charset
+ charset_list = self.charset_parser.findall(html)
+ if charset_list:
+ charset = charset_list[0]
+ if charset and charset not in ('utf-8', 'UTF-8'):
+ try:
+ safe_html_string = html.decode(charset).encode('utf-8')
+ except (UnicodeDecodeError, LookupError):
+ pass
+ else:
+ charset = 'utf-8' # Override charset if convertion succeeds
+ transform_tool = getToolByName(portal, 'portal_transforms')
+ safe_html_string = transform_tool.convertToData(format, html,
+ encoding=charset)
+ return safe_html_string
security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
def getContentInformation(self):
Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] Fri Mar 5 12:04:35 2010
@@ -452,7 +452,7 @@
part_encoding = part.get_content_charset()
part_html = part.get_payload(decode=1)
# Invoke Document class HTML stripper
- html_result = self._stripHTML(part_html, charset=part_encoding)
+ html_result = self._safeHTML(part_html, charset=part_encoding)
if html_result:
# Give priority to HTML
text_result = html_result
Modified: erp5/trunk/products/ERP5/interfaces/html_convertable.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/interfaces/html_convertable.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/interfaces/html_convertable.py [utf8] (original)
+++ erp5/trunk/products/ERP5/interfaces/html_convertable.py [utf8] Fri Mar 5 12:04:35 2010
@@ -53,4 +53,14 @@
kw -- optional parameters which can be passed to the
conversion engine
- """
+ """
+
+ def asSafeHTML(**kw):
+ """
+ Converts the current document to HTML, and remove
+ emmbed javascript, forms, any external plugins imports.
+
+ kw -- optional parameters which can be passed to the
+ conversion engine
+ """
+
More information about the Erp5-report
mailing list