[Erp5-report] r41241 hugo.maia - in /erp5/trunk/utils/cloudooo/cloudooo: ./ granulate/ inte...
nobody at svn.erp5.org
nobody at svn.erp5.org
Wed Dec 8 19:08:03 CET 2010
Author: hugo.maia
Date: Wed Dec 8 19:08:02 2010
New Revision: 41241
URL: http://svn.erp5.org?rev=41241&view=rev
Log:
Implement getParagraphItemList and getParagraphItem
Modified:
erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt
erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py
erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py
erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt
erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py
Modified: erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/CHANGES.txt [utf8] Wed Dec 8 19:08:02 2010
@@ -1,7 +1,7 @@
1.0.10 (unreleased)
===================
- - Add getImage for OOGranulate
- - Add getImageItemList for OOGranulate
+ - Add getParagraphItemList and getParagraphItem for OOGranulate
+ - Add getImageItemList and getImage for OOGranulate
- Add OdfDocument
- Add granulate interface.
Modified: erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/granulate/oogranulate.py [utf8] Wed Dec 8 19:08:02 2010
@@ -42,6 +42,17 @@ class OOGranulate(object):
def __init__(self, file, source_format):
self.document = OdfDocument(file, source_format)
+ def _relevantParagraphList(self):
+ """Returns a list with the relevants lxml.etree._Element 'p' tags of
+ self.document.parsed_content. It exclude the 'p' inside 'draw:frame'."""
+ # XXX: this algorithm could be improved to not iterate with the file twice
+ # and probably get all relevant paragraph list by a single xpath call
+ all_p_list = self.document.parsed_content.xpath('//text:p',
+ namespaces=self.document.parsed_content.nsmap)
+ draw_p_list = self.document.parsed_content.xpath('//draw:frame//text:p',
+ namespaces=self.document.parsed_content.nsmap)
+ return [x for x in all_p_list if x not in draw_p_list]
+
def getTableItemList(self, file):
"""Returns the list of table IDs in the form of (id, title)."""
raise NotImplementedError
@@ -73,14 +84,27 @@ class OOGranulate(object):
path = 'Pictures/%s' % id
return self.document.getFile(path)
- def getParagraphItemList(self, file):
+ def getParagraphItemList(self):
"""Returns the list of paragraphs in the form of (id, class) where class
may have special meaning to define TOC/TOI."""
- raise NotImplementedError
+ key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
+ id = 0
+ paragraph_list = []
+ for p in self._relevantParagraphList():
+ paragraph_list.append((id, p.attrib[key]))
+ id += 1
+ return paragraph_list
- def getParagraphItem(self, file, paragraph_id):
+ def getParagraphItem(self, paragraph_id):
"""Returns the paragraph in the form of (text, class)."""
- raise NotImplementedError
+ try:
+ paragraph = self._relevantParagraphList()[paragraph_id]
+ text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap))
+ key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
+ p_class = paragraph.attrib[key]
+ return (text, p_class)
+ except IndexError:
+ return None
def getChapterItemList(self, file):
"""Returns the list of chapters in the form of (id, level)."""
Modified: erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/interfaces/granulate.py [utf8] Wed Dec 8 19:08:02 2010
@@ -55,11 +55,11 @@ class IImageGranulator(Interface):
class ITextGranulator(Interface):
"""Provides methods to granulate a document into chapters and paragraphs."""
- def getParagraphItemList(file):
+ def getParagraphItemList():
"""Returns the list of paragraphs in the form of (id, class) where class may
have special meaning to define TOC/TOI."""
- def getParagraphItem(file, paragraph_id):
+ def getParagraphItem(paragraph_id):
"""Returns the paragraph in the form of (text, class)."""
def getChapterItemList(file):
Modified: erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/tests/data/granulate_test.odt?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
Binary files - no diff available.
Modified: erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py
URL: http://svn.erp5.org/erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py?rev=41241&r1=41240&r2=41241&view=diff
==============================================================================
--- erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py [utf8] (original)
+++ erp5/trunk/utils/cloudooo/cloudooo/tests/testOOGranulate.py [utf8] Wed Dec 8 19:08:02 2010
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2010 Nexedi SA and Contributors. All Rights Reserved.
@@ -83,17 +84,40 @@ class TestOOGranulate(cloudoooTestCase):
obtained_image = self.oogranulate.getImage('anything.png')
self.assertEquals('', obtained_image)
+ def testRelevantParagraphList(self):
+ """Test if _relevantParagraphList returns a list with 'p' excluding the 'p'
+ inside 'draw:frame'"""
+ draw_p_list = self.oogranulate.document.parsed_content.xpath(
+ '//draw:frame//text:p',
+ namespaces=self.oogranulate.document.parsed_content.nsmap)
+ self.assertTrue(draw_p_list not in self.oogranulate._relevantParagraphList())
+
def testGetParagraphItemList(self):
- """Test if getParagraphItemList() returns the right paragraphs list"""
- self.assertRaises(NotImplementedError,
- self.oogranulate.getParagraphItemList,
- 'file')
+ """Test if getParagraphItemList() returns the right paragraphs list, with
+ the ids always in the same order"""
+ for i in range(5):
+ data = open('./data/granulate_test.odt').read()
+ oogranulate = OOGranulate(data, 'odt')
+ paragraph_list = oogranulate.getParagraphItemList()
+ self.assertEquals((0, 'P3'), paragraph_list[0])
+ self.assertEquals((1, 'P1'), paragraph_list[1])
+ self.assertEquals((2, 'P12'), paragraph_list[2])
+ self.assertEquals((8, 'P13'), paragraph_list[8])
+ self.assertEquals((19, 'Standard'), paragraph_list[19])
- def testGetParagraphItem(self):
+ def testGetParagraphItemSuccessfully(self):
"""Test if getParagraphItem() returns the right paragraph"""
- self.assertRaises(NotImplementedError, self.oogranulate.getParagraphItem,
- 'file',
- 'paragraph_id')
+ self.assertEquals(('Some images without title', 'P13'),
+ self.oogranulate.getParagraphItem(8))
+
+ big_paragraph = self.oogranulate.getParagraphItem(5)
+ self.assertEquals('P8', big_paragraph[1])
+ self.assertTrue(big_paragraph[0].startswith(u'A prática cotidiana prova'))
+ self.assertTrue(big_paragraph[0].endswith(u'corresponde às necessidades.'))
+
+ def testGetParagraphItemWithoutSuccess(self):
+ """Test if getParagraphItem() returns None for not existent id"""
+ self.assertEquals(None, self.oogranulate.getParagraphItem(200))
def testGetChapterItemList(self):
"""Test if getChapterItemList() returns the right chapters list"""
More information about the Erp5-report
mailing list