[Erp5-report] r29825 - /erp5/trunk/products/ERP5/interfaces/crawlable.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Oct 19 20:59:49 CEST 2009
Author: jp
Date: Mon Oct 19 20:59:49 2009
New Revision: 29825
URL: http://svn.erp5.org?rev=29825&view=rev
Log:
Initial upload
Added:
erp5/trunk/products/ERP5/interfaces/crawlable.py
Added: erp5/trunk/products/ERP5/interfaces/crawlable.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/interfaces/crawlable.py?rev=29825&view=auto
==============================================================================
--- erp5/trunk/products/ERP5/interfaces/crawlable.py (added)
+++ erp5/trunk/products/ERP5/interfaces/crawlable.py [utf8] Mon Oct 19 20:59:49 2009
@@ -1,0 +1,93 @@
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
+# Jean-Paul Smets-Solanes <jp at nexedi.com>
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+##############################################################################
+
+from zope.interface import Interface
+
+class ICrawlable(Interface):
+ """
+ Crawlable interface specification
+
+ Documents which implement the ICrawlable can be crawled by
+ extracting the URLs which they refer to and can be processed
+ by an ERP5 crawler such as the ContributionTool.
+ """
+
+ def crawlContent():
+ """
+ Initialises the crawling process from the current document.
+ The crawling process is delegate to an ERP5 crawler such
+ as the ContributionTool.
+ """
+
+ def getContentURLList():
+ """
+ Returns a list of URLs which the current document refers to.
+ URLs are returned as is (ie. relative, absolute, with or
+ without server header).
+ """
+
+ def getContentBaseURL():
+ """
+ Returns the content base URL based on the actual content or
+ based on any other information (ex. URL property, system
+ preferences, etc.). This information can be used to generate
+ a normalised URL.
+ """
+
+ def getContentNormalisedURLList():
+ """
+ Returns a list of URLs which the current document refers to.
+ URLs are returned in a normalised way, including server, port
+ and absolute path.
+ """
+
+ def isIndexContent(container=None, content=None):
+ """
+ Returns True if the content document acts as an index
+ to other documents. Returns False is the content document
+ contains relevant content for the end-user.
+
+ This method is used by ERP5 crawlers to make a difference between
+ URLs which return an index (ex. the list of emails of a mailing
+ list archive) and true content (ex. email content of a mailing list
+ archive).
+
+ Either container or content must be set equal None.
+
+ container -- a container document to which the calculation of
+ isIndexContent is delegated to, by default the
+ parent document
+
+ content -- the content document to ass, by default self
+
+ NOTE: Crawlable Documents and External Sources current
+ use the same isIndexContent method which is unified here,
+ but with a different signature. This is probably inconsistent
+ and the interface must be revised.
+ """
More information about the Erp5-report
mailing list