[Erp5-report] r29825 - /erp5/trunk/products/ERP5/interfaces/crawlable.py

nobody at svn.erp5.org nobody at svn.erp5.org
Mon Oct 19 20:59:49 CEST 2009


Author: jp
Date: Mon Oct 19 20:59:49 2009
New Revision: 29825

URL: http://svn.erp5.org?rev=29825&view=rev
Log:
Initial upload

Added:
    erp5/trunk/products/ERP5/interfaces/crawlable.py

Added: erp5/trunk/products/ERP5/interfaces/crawlable.py
URL: http://svn.erp5.org/erp5/trunk/products/ERP5/interfaces/crawlable.py?rev=29825&view=auto
==============================================================================
--- erp5/trunk/products/ERP5/interfaces/crawlable.py (added)
+++ erp5/trunk/products/ERP5/interfaces/crawlable.py [utf8] Mon Oct 19 20:59:49 2009
@@ -1,0 +1,93 @@
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
+#                    Jean-Paul Smets-Solanes <jp at nexedi.com>
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsability of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# garantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+
+from zope.interface import Interface
+
+class ICrawlable(Interface):
+  """
+  Crawlable interface specification
+
+  Documents which implement the ICrawlable can be crawled by
+  extracting the URLs which they refer to and can be processed
+  by an ERP5 crawler such as the ContributionTool.
+  """
+
+  def crawlContent():
+    """
+    Initialises the crawling process from the current document.
+    The crawling process is delegate to an ERP5 crawler such
+    as the ContributionTool.
+    """
+
+  def getContentURLList():
+    """
+    Returns a list of URLs which the current document refers to.
+    URLs are returned as is (ie. relative, absolute, with or
+    without server header).
+    """
+
+  def getContentBaseURL():
+    """
+    Returns the content base URL based on the actual content or
+    based on any other information (ex. URL property, system 
+    preferences, etc.). This information can be used to generate
+    a normalised URL.
+    """
+
+  def getContentNormalisedURLList():
+    """
+    Returns a list of URLs which the current document refers to.
+    URLs are returned in a normalised way, including server, port
+    and absolute path.
+    """
+
+  def isIndexContent(container=None, content=None):
+    """
+    Returns True if the content document acts as an index
+    to other documents. Returns False is the content document
+    contains relevant content for the end-user. 
+
+    This method is used by ERP5 crawlers to make a difference between
+    URLs which return an index (ex. the list of emails of a mailing
+    list archive) and true content (ex. email content of a mailing list
+    archive).
+
+    Either container or content must be set equal None.
+
+    container -- a container document to which the calculation of
+                 isIndexContent is delegated to, by default the
+                 parent document
+
+    content -- the content document to ass, by default self
+
+    NOTE: Crawlable Documents and External Sources current
+    use the same isIndexContent method which is unified here,
+    but with a different signature. This is probably inconsistent
+    and the interface must be revised.
+    """




More information about the Erp5-report mailing list