[Erp5-report] r35854 mayoro - /erp5/trunk/products/ERP5Form/ScribusParser.py

Tue Jun 1 12:52:34 CEST 2010

Author: mayoro
Date: Tue Jun  1 12:52:24 2010
New Revision: 35854

URL: http://svn.erp5.org?rev=35854&view=rev
Log:
Initial version of ScribusParser.py for parsing Scribus files using lxml

Added:
    erp5/trunk/products/ERP5Form/ScribusParser.py

Added: erp5/trunk/products/ERP5Form/ScribusParser.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5Form/ScribusParser.py?rev=35854&view=auto
==============================================================================

--- erp5/trunk/products/ERP5Form/ScribusParser.py (added)
+++ erp5/trunk/products/ERP5Form/ScribusParser.py [utf8] Tue Jun  1 12:52:24 2010
@@ -1,0 +1,485 @@
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
+#                     Mayoro DIAGNE <mayoro at nexedi.com>
+#                     Guy Oswald OBAMA <guy at nexedi.com>
+#                     thomas <thomas at nexedi.com>
+#                     Mame C.Sall <mame at nexedi.com>                  
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+from AccessControl import ClassSecurityInfo
+from lxml import etree
+
+class ScribusParser:
+  """
+    Scribus parser API provide methods wich allow to parse a scribus file.
+  """
+  security = ClassSecurityInfo()
+
+  def __init__(self, scribus_file_descriptor):
+    """
+    initialise self.data with scribus_file_descriptor if string's
+    The __init__ function can take either a filename, an open file object
+    or the content of the file 
+    """
+    if scribus_file_descriptor is None:
+      raise ValueError, "No Scribus file provided, please choose a Scibus Form"
+
+    if type(scribus_file_descriptor) == 'str':
+      data = scribus_file_descriptor
+    elif hasattr(scribus_file_descriptor, "read"):
+      data = scribus_file_descriptor.read()
+      scribus_file_descriptor.close()
+    else: 
+      source = open(scribus_file_descriptor, "rb")
+      data = source.read() 
+      source.close()
+    data = data.replace('&#x5;', '\n')
+    data = data.replace('&#x4;', '\t')
+    self.data = data
+
+  def getData(self):
+    """
+    Return the content file in XML structured
+    """
+    return self.data
+
+  def getEtreeXMLObject(self):
+    """
+    Return the content file in XML structured
+    """
+    return etree.XML(self.getData())
+
+
+  def getXMLObjectByTagName(self, tag_name):
+    """
+    return a list containing all objects with tag name tag_name
+    """
+    root = self.getEtreeXMLObject()
+    tag_list = []
+    for node in root.iterdescendants():
+      if node.tag == tag_name:
+        tag_list.append(node)
+    return tag_list 
+
+  security.declarePublic('getScribusFileVersion')
+  def getScribusFileVersion(self):
+    """
+    Return the scribus version of the file with content content_file
+    """
+    root = self.getEtreeXMLObject()
+    if 'Version' in root.keys():
+      return root.attrib['Version']   
+    else:
+      return None
+
+  security.declarePublic('getPageCount')
+  def getPageCount(self):
+    """
+    Return the page count of the scribus file
+    """
+    page_count = 0
+    # a scribus document has just one tag DOCUMENT
+    document_list = self.getXMLObjectByTagName('DOCUMENT')
+    if len(document_list) != 0:
+      document = document_list[0]
+      if 'ANZPAGES' in document.attrib.keys():
+        page_count = document.attrib['ANZPAGES']
+    return int(page_count)
+
+  security.declarePublic('getPageGap')
+  def getPageGap(self):
+    version = self.getScribusFileVersion()
+    document_list = self.getXMLObjectByTagName('DOCUMENT')
+    page_list = self.getXMLObjectByTagName('PAGE')
+    page_gap = 0
+    if version is not None and len(page_list) != 0:
+      page0 = page_list[0]
+      if 'BORDERTOP' in page0.attrib.keys():
+        page_gap = page0.attrib['BORDERTOP']
+      else:
+        if len(document_list) != 0:
+          document = document_list[0]
+          if 'BORDERTOP' in document.attrib.keys():
+            page_gap = document.attrib['BORDERTOP']
+    return int(page_gap)
+
+  security.declarePublic('getPageWidth')
+  def getPageWidth(self):
+    """
+    Return the page width of the scribus file in pixel (px)
+    """
+    page_width = 0
+    # a scribus document has just one tag DOCUMENT
+    document_list = self.getXMLObjectByTagName('DOCUMENT')
+    if len(document_list) != 0:
+      document = document_list[0]
+      if 'PAGEWIDTH' in document.attrib.keys():
+        page_width = document.attrib['PAGEWIDTH']
+    return float(page_width)
+
+  security.declarePublic('getPageHeight')
+  def getPageHeight(self):
+    """
+    Return the page height of the scribus file in pixel (px)
+    """
+    page_height = 0
+    # a scribus document has just one tag DOCUMENT
+    document_list = self.getXMLObjectByTagName('DOCUMENT')
+    if len(document_list) != 0:
+      document = document_list[0]
+      if 'PAGEHEIGHT' in document.attrib.keys():
+        page_height = document.attrib['PAGEHEIGHT']
+    return float(page_height)
+
+
+  security.declarePublic('getDocumentAttributeByName')
+  def getDocumentAttributeByName(self, attribute_name):
+    """
+    Generic function for page's attributes. Return the page attribute value 
+    corresponding of attribute_name of the scribus file document
+    """
+    page_attribute = 0
+    document_list = self.getXMLObjectByTagName('DOCUMENT')
+    if len(document_list) != 0:
+      document = document_list[0]
+      if attribute_name in document.attrib.keys():
+        page_attribute = document.attrib[attribute_name]
+    return page_attribute
+
+  def getAttributeValueXMLObject(self, xml_object, attribute):
+    """
+    return the value of the attribute attribute for xml_object
+    for exemple obj refer to <PAGEOBJECT PTYPE="4" ...
+    getAttributeValueXMLObject(obj, 'PTYPE') retur "4"
+    """
+    value = None
+    if attribute in xml_object.keys():
+      value = xml_object.attrib[attribute]
+    return value
+
+  security.declarePublic('getFieldIdList')
+  def getFieldIdList(self):
+    """
+    Return a list of field ids of the scribus document 
+    """
+    page_object_list = self.getXMLObjectByTagName('PAGEOBJECT')
+    filed_id_list = []
+    for element in page_object_list:
+      field_name = self.getAttributeValueXMLObject(element, 'ANNAME')
+      if field_name is not None:
+        field_name = field_name.replace(' ','_')
+        if field_name != '' and element.attrib['PTYPE']=="4":
+          filed_id_list.append(field_name)
+    return filed_id_list
+
+  security.declarePublic('getFieldItemList')
+  def getFieldItemList(self):
+    """
+    Return a list of fields of the scribus document with attributes
+    """
+    page_object_list = self.getXMLObjectByTagName('PAGEOBJECT')
+    filed_item_list = []
+    for element in page_object_list:
+      field_name = self.getAttributeValueXMLObject(element, 'ANNAME')
+      if field_name is not None:
+        field_name = field_name.replace(' ','_')
+        if field_name != '' and element.attrib['PTYPE']=="4":
+          filed_item_list.append((field_name,element.attrib))
+    return filed_item_list
+
+  security.declarePublic('getFieldIdListFor')
+  def getFieldIdListFor(self, page=0):
+    """
+    Return a list of field ids at page: page
+    """
+    page_object_list = self.getXMLObjectByTagName('PAGEOBJECT')
+    filed_id_list = []
+    for element in page_object_list:
+      field_name = self.getAttributeValueXMLObject(element, 'ANNAME')
+      if field_name is not None:
+        field_name = field_name.replace(' ','_')
+      field_page = self.getAttributeValueXMLObject(element, 'OwnPage')
+      if field_page is not None:
+        field_page = int(field_page)
+      if field_name is not None and field_page is not None:
+        if field_name != '' and field_page==page and element.attrib['PTYPE']=="4":
+          filed_id_list.append(field_name)
+    return filed_id_list
+
+  security.declarePublic('getFieldItemListFor')
+  def getFieldItemListFor(self, page=0):
+    """
+    Return a list of fields at page:page with attributes
+    """
+    page_object_list = self.getXMLObjectByTagName('PAGEOBJECT')
+    filed_item_list = []
+    for element in page_object_list:
+      field_name = self.getAttributeValueXMLObject(element, 'ANNAME')
+      if field_name is not None:
+        field_name = field_name.replace(' ','_')
+      field_page = self.getAttributeValueXMLObject(element, 'OwnPage')
+      if field_page is not None:
+        field_page = int(field_page)
+      if field_name is not None and field_page is not None:
+        if field_name != '' and field_page==page and element.attrib['PTYPE']=="4":
+          filed_item_list.append((field_name, element.attrib))
+    return filed_item_list
+
+  security.declarePublic('getPropertyFieldDictFor')
+  def getPropertyFieldDictFor(self, field_name):
+    """
+    Return a dictionnary containing properties of a given field
+    """
+    property_dict = {}
+    for field in self.getFieldItemList():
+      if field[0] == field_name:
+        property_dict = field[1]
+    return property_dict
+
+  security.declarePublic('getERP5PropertyDict')
+  def getERP5PropertyDict(self):
+    """
+    Return a dict containing properties of fields by page
+    like: {0:[(field_name, {properti1:value1,...}),...]}
+    After transforming scribus attributes into usable ERP5 one
+    """
+    #scratch_left: Space in pixel at the left of the scratch space
+    document_scratch_left = self.getDocumentAttributeByName('ScratchLeft')
+    #scratch_top: Space at the top of the scratch space, before the pages
+    document_scratch_top = self.getDocumentAttributeByName('ScratchTop')
+    erp5_property_dict = {}
+    for page in range(self.getPageCount()):
+      erp5_property_list = []
+      for property_id, scribus_property_dict in self.getFieldItemListFor(page):
+        usable_property = {}
+        usable_property['position_x'] = \
+                int(float(scribus_property_dict['XPOS']) - float(document_scratch_left))
+        usable_property['position_y'] = \
+                int(float(scribus_property_dict['YPOS']) - float(document_scratch_top))
+        usable_property['size_x'] = int(float(scribus_property_dict['WIDTH']))
+        usable_property['size_y'] = int(float(scribus_property_dict['HEIGHT']))
+        user_property = self.getERP5AttributesFieldDict(property_id)
+        usable_property['title'] = ''   
+        if user_property.has_key('title'):
+          usable_property['title'] = user_property['title']
+        temp_order = 'left'
+        if user_property.has_key('order'):
+          temp_order = user_property['order']
+        # generating erp5 attribute order position for erp5 form
+        if temp_order not in  ['left','right']:
+          # temp_order is invalid
+          # trying to get it from its position in original Scribus file
+          if user_property['position_x'] > 280 :
+            temp_order = 'right'
+        usable_property['order'] = temp_order
+        # defining global variables for ANFLAG tag values
+        # these values can be found at http://docs.scribus.net
+        # for File Format Specification for Scribus
+        def_noScroll = 8388608
+        def_noSpellCheck = 4194304
+        def_editable = 262144
+        def_password = 8192
+        def_multiLine = 4096
+        def_noExport = 4
+        def_required = 2
+        def_readOnly = 1
+        # initialising properties for default values
+        usable_property['noScroll'] = 0
+        usable_property['noSpellCheck'] = 0
+        usable_property['editable'] = 0
+        usable_property['password'] = 0
+        usable_property['multiline'] = 0
+        usable_property['noExport'] = 0
+        usable_property['required'] = 0
+        usable_property['editable'] = 1
+        # updating properties with real values after tests
+        temp_ANFLAG = long(scribus_property_dict['ANFLAG'])
+        if temp_ANFLAG - def_noScroll >= 0:
+          # substracting value
+          temp_ANFLAG = temp_ANFLAG - def_noScroll
+          # 'do not scroll' field
+          # adding property
+          usable_property['noscroll'] = 1
+        if temp_ANFLAG - def_noSpellCheck >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_noSpellCheck
+          # 'do not spell check' field
+          usable_property['noSpellCheck'] = 1
+        if temp_ANFLAG - def_editable >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_editable
+          # 'editable' field
+          usable_property['editable'] = 1
+        if temp_ANFLAG - def_password >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_password
+          # 'password' field
+          usable_property['password'] = 1
+        if temp_ANFLAG - def_multiLine >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_multiLine
+          # 'multiline' field
+          usable_property['multiline'] = 1
+        if temp_ANFLAG - def_noExport >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_noExport
+          # 'do not export data' field
+          usable_property['noExport'] = 1
+        if temp_ANFLAG - def_required >= 0:
+          temp_ANFLAG = temp_ANFLAG - def_required
+          # 'required field
+          usable_property['required'] = 1
+        if temp_ANFLAG == def_readOnly:
+          # 'read only" field
+          usable_property['editable'] = 0
+        if user_property.has_key('maximum_input'):
+          usable_property['maximum_input'] = user_property['maximum_input']
+        else: 
+          usable_property['maximum_input'] = scribus_property_dict['ANMC']
+        # getting object type :
+        # first checking for user-specified type in 'tooltip' properties
+        if user_property.has_key('type'):
+          # 'type' id in tooltip : using it and ignoring scribus 'type'
+          usable_property['type'] = user_property['type']
+        elif scribus_property_dict.has_key('ANTYPE'):
+          # from scribus type (selected in the scribus PDF-form properties)
+          object_type = scribus_property_dict['ANTYPE']
+          if object_type == '2':
+            #type 2 = PDF-Button : InputButtonField
+            usable_property['type'] = 'InputButtonField'
+          elif object_type == '3':
+            #type 3 = PDF-Text : Stringfield by default
+            usable_property['type'] = 'StringField'
+            if usable_property['multiline'] == 1:
+              # Stringfield is multiline, converting to TextAreaField
+              usable_property['type'] = 'TextAreaField'
+            elif scribus_property_dict.has_key('ANFORMAT'):
+              object_format = scribus_property_dict['ANFORMAT']
+              # checking kind of Stringfield
+              if object_format == '1':
+                #type is number
+                usable_property['type'] = 'IntegerField'
+              elif object_format == '2':
+                #type is percentage
+                usable_property['type'] = 'FloatField'
+              elif object_format == '3':
+                #type is date
+                usable_property['type'] = 'DateTimeField'
+              elif object_format == '4':
+                #type is time
+                usable_property['type'] = 'DateTimeField'
+          elif object_type == '4':
+            # type 4 = PDF-Checkbox
+            usable_property['type'] = 'CheckBoxField'
+          elif object_type == '5':
+            # type 5 = PDF-Combobox
+            usable_property['type'] = 'ListField'
+          elif object_type == '6':
+            # type 6 = PDF-ListBox
+            usable_property['type'] = 'MultiListField'
+        else:
+          # object type not found in user-properties neither in
+          # document-properties. Use by default StringField
+          usable_property['type'] = 'StringField'
+        # getting data_type relative to object type
+        # (used in property_sheet to save field values).
+        usable_property['data_type'] = 'string'
+        usable_property['default_data'] = ''
+        if usable_property['type'] == 'MultiListField':
+          usable_property['data_type'] = 'tokens'
+        if usable_property['type'] == 'IntegerField':
+          usable_property['data_type'] = 'int'
+          usable_property['default_data'] = 0
+        if usable_property['type'] == 'FloatField':
+          usable_property['data_type'] = 'float'
+          usable_property['default_data'] = 0.0
+        if usable_property['type'] == 'CheckBoxField':
+          usable_property['data_type'] = 'boolean'
+          usable_property['default_data'] = 0
+        if usable_property['type'] == 'DateTimeField':
+          usable_property['data_type'] = 'date'
+          usable_property['default_data'] = '1900/01/01'
+        # checking for user data if required and editable properties are defined
+        #  in ANTOOLTIP otherwise keep scribus one
+        if user_property.has_key('required'):
+          usable_property['required'] = user_property['required']
+        if user_property.has_key('editable'):
+          usable_property['editable'] = user_property['editable']
+        # getting type properties for special types
+        usable_property['rendering'] = 'single'
+        # Stringfields handle properties
+        # checkbox objects belongs to a group of checkbox
+        if usable_property['type'] == 'CheckBoxField' :
+          # checking if THIS checkbox is in a group
+          usable_property['group'] = '0'
+          if user_property.has_key('group'):
+            usable_property['group'] = user_property['group']
+        # object is datetimefield and need several informations
+        if usable_property['type'] == 'DateTimeField':
+          # has been tested successfully
+          usable_property['rendering'] = 'multiple'
+          # checking if field has input_order property
+          usable_property['input_order'] = 'ymd'
+          if user_property.has_key('input_order'):
+            usable_property['input_order'] = user_property['input_order']
+          usable_property['date_only'] = '1'
+          if user_property.has_key('date_only'):
+            usable_property['date_only'] = user_property['date_only']
+          # checking if special date separator is specified
+          # most of PDF forms already have '/' character to differenciate
+          # date fields, in this case no separator is needed and the script
+          # will automatically insert ' ' between element.
+          # > this value is not used in ScribusUtils.py , but in PDFForm.py
+          # when creating the fdf file to fill the PDF form.
+          if usable_property['editable'] == 1:
+            usable_property['date_separator'] = ''
+            usable_property['time_separator'] = ''
+          else:
+            usable_property['date_separator'] = '/' 
+            if user_property.has_key('date_separator'):
+              usable_property['date_separator'] = user_property['date_separator']
+            usable_property['time_separator'] = ':' 
+            if user_property.has_key('time_separator'):
+              usable_property['time_separator'] = user_property['time_separator']
+        # getting creation order from 'tooltip' properties
+        # used to create ERP5 objects in a special order
+        if user_property.has_key('nb') and str(user_property['nb']).isdigit():
+          # object has a nb properties containing its creation position
+          usable_property['nb'] = user_property['nb']
+        erp5_property_list.append((property_id, usable_property))   
+      erp5_property_dict[page] = erp5_property_list
+    return erp5_property_dict
+
+  security.declarePublic('getERP5AttributesFieldDict')
+  def getERP5AttributesFieldDict(self, field_name):
+    """
+    Return a dictionnary containing ERP5 attributes of a given field
+    like: nb(creation order), type, title
+    """
+    erp5_attribute_dict = {}
+    for field in self.getFieldItemList():
+      if field[0] == field_name:
+        for attributes in field[1]["ANTOOLTIP"].split("#"):
+          if attributes != "":
+            key,value = attributes.split(":")
+            if key == "nb":
+              value = int(value)     
+            erp5_attribute_dict[key]=value
+    return erp5_attribute_dict