[Erp5-report] r33438 nicolas - in /erp5/trunk/products/ERP5: Document/ interfaces/

nobody at svn.erp5.org nobody at svn.erp5.org
Fri Mar 5 12:04:36 CET 2010


Author: nicolas
Date: Fri Mar  5 12:04:35 2010
New Revision: 33438

URL: http://svn.erp5.org?rev=33438&view=rev
Log:
Implement asSafeHTML output for documents:
  - It aims to strip html documents and remove 
  non safe contents like emmbed javascript, forms,
  import of externals multimediai contents, ...
  - usefull to display HTML attachments of ingested events
  - Use portal_transforms as conversion engine (and its transform safe_html).
reviewed by Kazuhiko

Modified:
    erp5/trunk/products/ERP5/Document/Document.py
    erp5/trunk/products/ERP5/Document/EmailDocument.py
    erp5/trunk/products/ERP5/interfaces/html_convertable.py

Modified: erp5/trunk/products/ERP5/Document/Document.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/Document.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/Document.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/Document.py [utf8] Fri Mar  5 12:04:35 2010
@@ -1204,6 +1204,25 @@
       mime, html = self.convert(**kw)
       return self._stripHTML(str(html))
 
+  security.declareProtected(Permissions.View, 'asSafeHTML')
+  def asSafeHTML(self, **kw):
+    """
+      Converts the current document to HTML, strip it and remove
+      emmbed javascript, forms, any external plugins imports.
+    """
+    format = 'text/x-html-safe'
+    if not self.hasBaseData():
+      return ''
+    try:
+      mime, data = self.getConversion(format=format)
+      return data
+    except KeyError:
+      kw['format'] = 'html'
+      mime, html = self.convert(**kw)
+      safe_html = self._safeHTML(str(html), format=format)
+      self.setConversion(safe_html, mime=mime, format=format)
+      return safe_html
+
   def _guessEncoding(self, string):
     """
       Try to guess the encoding for this string.
@@ -1239,6 +1258,34 @@
         return str(stripped_html)
     return stripped_html
 
+  def _safeHTML(self, html, format='text/x-html-safe', charset=None):
+    """
+      A private method to strip HTML content in safe mode,
+      w/o emmbed javascript, forms and any external plugins imports.
+      This should be used when we do not trust the user (Anonymous)
+      who push data into database.
+      - html: content to strip
+      - format: destination format
+      - charset: charset used to encode string. Take precedence
+      on charset values found in html string
+    """
+    portal = self.getPortalObject()
+    if charset is None:
+      # find charset
+      charset_list = self.charset_parser.findall(html)
+      if charset_list:
+        charset = charset_list[0]
+    if charset and charset not in ('utf-8', 'UTF-8'):
+      try:
+        safe_html_string = html.decode(charset).encode('utf-8')
+      except (UnicodeDecodeError, LookupError):
+        pass
+      else:
+        charset = 'utf-8' # Override charset if convertion succeeds
+    transform_tool = getToolByName(portal, 'portal_transforms')
+    safe_html_string = transform_tool.convertToData(format, html,
+                                                    encoding=charset)
+    return safe_html_string
 
   security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
   def getContentInformation(self):

Modified: erp5/trunk/products/ERP5/Document/EmailDocument.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/Document/EmailDocument.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] (original)
+++ erp5/trunk/products/ERP5/Document/EmailDocument.py [utf8] Fri Mar  5 12:04:35 2010
@@ -452,7 +452,7 @@
         part_encoding = part.get_content_charset()
         part_html = part.get_payload(decode=1)
         # Invoke Document class HTML stripper
-        html_result = self._stripHTML(part_html, charset=part_encoding)
+        html_result = self._safeHTML(part_html, charset=part_encoding)
     if html_result:
       # Give priority to HTML
       text_result = html_result

Modified: erp5/trunk/products/ERP5/interfaces/html_convertable.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/interfaces/html_convertable.py?rev=33438&r1=33437&r2=33438&view=diff
==============================================================================
--- erp5/trunk/products/ERP5/interfaces/html_convertable.py [utf8] (original)
+++ erp5/trunk/products/ERP5/interfaces/html_convertable.py [utf8] Fri Mar  5 12:04:35 2010
@@ -53,4 +53,14 @@
  
     kw -- optional parameters which can be passed to the
           conversion engine
-    """
+    """
+
+  def asSafeHTML(**kw):
+    """
+    Converts the current document to HTML, and remove
+    emmbed javascript, forms, any external plugins imports.
+
+    kw -- optional parameters which can be passed to the
+          conversion engine
+    """
+




More information about the Erp5-report mailing list