[Erp5-report] r41241 hugo.maia - in /erp5/trunk/utils/cloudooo/cloudooo: ./ granulate/ inte...

nobody at svn.erp5.org nobody at svn.erp5.org
Wed Dec 8 19:08:03 CET 2010


Author: hugo.maia
Date: Wed Dec  8 19:08:02 2010
New Revision: 41241

URL: http://svn.erp5.org?rev=41241&view=rev
Log:
Implement getParagraphItemList and getParagraphItem

Modified:
    erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt
    erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py
    erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py
    erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt
    erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py

Modified: erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt [utf8] Wed Dec  8 19:08:02 2010
@@ -1,7 +1,7 @@
 1.0.10 (unreleased)
 ===================
-  - Add getImage for OOGranulate
-  - Add getImageItemList for OOGranulate
+  - Add getParagraphItemList and getParagraphItem for OOGranulate
+  - Add getImageItemList and getImage for OOGranulate
   - Add OdfDocument
   - Add granulate interface.
 

Modified: erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py [utf8] Wed Dec  8 19:08:02 2010
@@ -42,6 +42,17 @@ class OOGranulate(object):
   def __init__(self, file, source_format):
     self.document = OdfDocument(file, source_format)
 
+  def _relevantParagraphList(self):
+    """Returns a list with the relevants lxml.etree._Element 'p' tags of
+    self.document.parsed_content. It exclude the 'p' inside 'draw:frame'."""
+    # XXX: this algorithm could be improved to not iterate with the file twice
+    #      and probably get all relevant paragraph list by a single xpath call
+    all_p_list = self.document.parsed_content.xpath('//text:p',
+                                namespaces=self.document.parsed_content.nsmap)
+    draw_p_list = self.document.parsed_content.xpath('//draw:frame//text:p',
+                                namespaces=self.document.parsed_content.nsmap)
+    return [x for x in all_p_list if x not in draw_p_list]
+
   def getTableItemList(self, file):
     """Returns the list of table IDs in the form of (id, title)."""
     raise NotImplementedError
@@ -73,14 +84,27 @@ class OOGranulate(object):
     path = 'Pictures/%s' % id
     return self.document.getFile(path)
 
-  def getParagraphItemList(self, file):
+  def getParagraphItemList(self):
     """Returns the list of paragraphs in the form of (id, class) where class
     may have special meaning to define TOC/TOI."""
-    raise NotImplementedError
+    key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
+    id = 0
+    paragraph_list = []
+    for p in self._relevantParagraphList():
+      paragraph_list.append((id, p.attrib[key]))
+      id += 1
+    return paragraph_list
 
-  def getParagraphItem(self, file, paragraph_id):
+  def getParagraphItem(self, paragraph_id):
     """Returns the paragraph in the form of (text, class)."""
-    raise NotImplementedError
+    try:
+      paragraph = self._relevantParagraphList()[paragraph_id]
+      text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap))
+      key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
+      p_class = paragraph.attrib[key]
+      return (text, p_class)
+    except IndexError:
+      return None
 
   def getChapterItemList(self, file):
     """Returns the list of chapters in the form of (id, level)."""

Modified: erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py [utf8] Wed Dec  8 19:08:02 2010
@@ -55,11 +55,11 @@ class IImageGranulator(Interface):
 class ITextGranulator(Interface):
   """Provides methods to granulate a document into chapters and paragraphs."""
 
-  def getParagraphItemList(file):
+  def getParagraphItemList():
     """Returns the list of paragraphs in the form of (id, class) where class may
     have special meaning to define TOC/TOI."""
 
-  def getParagraphItem(file, paragraph_id):
+  def getParagraphItem(paragraph_id):
     """Returns the paragraph in the form of (text, class)."""
 
   def getChapterItemList(file):

Modified: erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
Binary files - no diff available.

Modified: erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py [utf8] Wed Dec  8 19:08:02 2010
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 ##############################################################################
 #
 # Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
@@ -83,17 +84,40 @@ class TestOOGranulate(cloudoooTestCase):
     obtained_image = self.oogranulate.getImage('anything.png')
     self.assertEquals('', obtained_image)
 
+  def testRelevantParagraphList(self):
+    """Test if _relevantParagraphList returns a list with 'p' excluding the 'p'
+    inside 'draw:frame'"""
+    draw_p_list = self.oogranulate.document.parsed_content.xpath(
+                    '//draw:frame//text:p',
+                    namespaces=self.oogranulate.document.parsed_content.nsmap)
+    self.assertTrue(draw_p_list not in self.oogranulate._relevantParagraphList())
+
   def testGetParagraphItemList(self):
-    """Test if getParagraphItemList() returns the right paragraphs list"""
-    self.assertRaises(NotImplementedError,
-                      self.oogranulate.getParagraphItemList,
-                      'file')
+    """Test if getParagraphItemList() returns the right paragraphs list, with
+    the ids always in the same order"""
+    for i in range(5):
+      data = open('./data/granulate_test.odt').read()
+      oogranulate = OOGranulate(data, 'odt')
+      paragraph_list = oogranulate.getParagraphItemList()
+      self.assertEquals((0, 'P3'), paragraph_list[0])
+      self.assertEquals((1, 'P1'), paragraph_list[1])
+      self.assertEquals((2, 'P12'), paragraph_list[2])
+      self.assertEquals((8, 'P13'), paragraph_list[8])
+      self.assertEquals((19, 'Standard'), paragraph_list[19])
 
-  def testGetParagraphItem(self):
+  def testGetParagraphItemSuccessfully(self):
     """Test if getParagraphItem() returns the right paragraph"""
-    self.assertRaises(NotImplementedError, self.oogranulate.getParagraphItem,
-                                     'file',
-                                     'paragraph_id')
+    self.assertEquals(('Some images without title', 'P13'),
+                      self.oogranulate.getParagraphItem(8))
+
+    big_paragraph = self.oogranulate.getParagraphItem(5)
+    self.assertEquals('P8', big_paragraph[1])
+    self.assertTrue(big_paragraph[0].startswith(u'A prática cotidiana prova'))
+    self.assertTrue(big_paragraph[0].endswith(u'corresponde às necessidades.'))
+
+  def testGetParagraphItemWithoutSuccess(self):
+    """Test if getParagraphItem() returns None for not existent id"""
+    self.assertEquals(None, self.oogranulate.getParagraphItem(200))
 
   def testGetChapterItemList(self):
     """Test if getChapterItemList() returns the right chapters list"""



More information about the Erp5-report mailing list