[Erp5-report] r40030 ivan - in /erp5/trunk/bt5/erp5_knowledge_pad: ExtensionTemplateItem/ S...
nobody at svn.erp5.org
nobody at svn.erp5.org
Mon Nov 8 14:08:27 CET 2010
Author: ivan
Date: Mon Nov 8 14:08:27 2010
New Revision: 40030
URL: http://svn.erp5.org?rev=40030&view=rev
Log:
Use feedparse python library rather than hand parse XML.
Tranlate UI messages.
Modified:
erp5/trunk/bt5/erp5_knowledge_pad/ExtensionTemplateItem/GetRssDataAsDict.py
erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/Base_getRssDataAsDocumentList.xml
erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/RssFeed_getSummaryAsHTML.xml
erp5/trunk/bt5/erp5_knowledge_pad/bt/revision
Modified: erp5/trunk/bt5/erp5_knowledge_pad/ExtensionTemplateItem/GetRssDataAsDict.py
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_knowledge_pad/ExtensionTemplateItem/GetRssDataAsDict.py?rev=40030&r1=40029&r2=40030&view=diff
==============================================================================
--- erp5/trunk/bt5/erp5_knowledge_pad/ExtensionTemplateItem/GetRssDataAsDict.py [utf8] (original)
+++ erp5/trunk/bt5/erp5_knowledge_pad/ExtensionTemplateItem/GetRssDataAsDict.py [utf8] Mon Nov 8 14:08:27 2010
@@ -1,179 +1,54 @@
-from urllib2 import HTTPPasswordMgrWithDefaultRealm, HTTPBasicAuthHandler, \
- build_opener, install_opener, urlopen, HTTPError
-from xml.dom.minidom import parseString
-import md5
-from HTMLParser import HTMLParser
-import socket
-
-def getRssDataAsDict(url, username, password):
- passman = HTTPPasswordMgrWithDefaultRealm()
- passman.add_password(None, url, username, password)
- auth_handler = HTTPBasicAuthHandler(passman)
- opener = build_opener(auth_handler)
- install_opener(opener)
- try:
- default_timeout = socket.getdefaulttimeout()
- socket.setdefaulttimeout(5.0)
- try:
- file = urlopen(url)
- finally:
- socket.setdefaulttimeout(default_timeout)
-
- except IOError , e:
- return {'title': 'Connection problem, please retry later.'}
- except ValueError , e:
- return {'title': 'Please enter a valid Rss or Atom url in the preference form.' }
- except HTTPError , e:
- if hasattr(e, 'code'):
- if e.code == 401:
- return {'title': 'Unauthorized, verify your authentication.' }
- if e.code == 404:
- return {'title': 'Page not found.' }
- except :
- return {'title': 'Fetching Rss failed.' }
- return parseRssDataAsDict(file.read())
-
-def parseRssDataAsDict(rss_string):
- try:
- xmlDoc = parseString(rss_string).documentElement
- except :
- return {'title': 'Parsing RSS failed.' }
- if(xmlDoc.tagName.startswith('rss') or xmlDoc.tagName.startswith('rdf') ):
- feed_data = {}
- RSSTitle = None
- if (xmlDoc.getElementsByTagName('title') and xmlDoc.getElementsByTagName('title')[0].parentNode.tagName != 'item'):
- feed_data['title'] = xmlDoc.getElementsByTagName('title')[0].firstChild.nodeValue
- if (xmlDoc.getElementsByTagName('image') and xmlDoc.getElementsByTagName('image')[0].parentNode.tagName != 'item'):
- logo = xmlDoc.getElementsByTagName('image')[0]
- if (logo.getElementsByTagName('url')):
- feed_data['logo'] = logo.getElementsByTagName('url')[0].firstChild.nodeValue
- elif(logo.getElementsByTagName('rdf:resource')):
- feed_data['logo'] = logo.getElementsByTagName('rdf:resource')[0].firstChild.nodeValue
- if (xmlDoc.getElementsByTagName('link') and xmlDoc.getElementsByTagName('link')[0].parentNode.tagName != 'item'):
- feed_data['link'] = xmlDoc.getElementsByTagName('link')[0].firstChild.nodeValue
- item_list = xmlDoc.getElementsByTagName('item')
- feed_data['items'] = []
- for item in item_list:
- message = {}
- message['other_links'] = []
- message['img'] = []
- if(item.getElementsByTagName('title') and item.getElementsByTagName('title')[0].firstChild):
- message['title'] = item.getElementsByTagName('title')[0].firstChild.nodeValue
- if(item.getElementsByTagName('link') and item.getElementsByTagName('link')[0].firstChild):
- message['link'] = item.getElementsByTagName('link')[0].firstChild.nodeValue
- if(item.getElementsByTagName('description') and item.getElementsByTagName('description')[0].firstChild):
- message['content'] = cleanHTML(item.getElementsByTagName('description')[0].firstChild.nodeValue)
- if (item.getElementsByTagName('pubDate') and item.getElementsByTagName('pubDate')[0].firstChild):
- message['date'] = item.getElementsByTagName('pubDate')[0].firstChild.nodeValue
- elif(item.getElementsByTagName('dc:date') and item.getElementsByTagName('dc:date')[0].firstChild):
- message['date'] = item.getElementsByTagName('dc:date')[0].firstChild.nodeValue
- if (item.getElementsByTagName('enclosure')):
- for enclosure in item.getElementsByTagName('enclosure'):
- if (str(enclosure.attributes['type'].nodeValue).find('image') != -1):
- message['img'].append(enclosure.attributes['url'].nodeValue)
- else:
- if (enclosure.attributes.has_key('title')):
- message['other_links'].append('<a href="'+enclosure.attributes['url'].nodeValue+'"target="_blank">'+enclosure.attributes['url'].nodeValue+'</a>')
- else:
- message['other_links'].append('<a href="'+enclosure.attributes['url'].nodeValue+'"target="_blank">'+enclosure.attributes['title'].nodeValue+'</a>')
- message['md5'] = md5.new(str(message)).hexdigest()
- feed_data['items'].append(message)
- elif(xmlDoc.tagName == 'feed'):
- feed_data = {}
- feedTitle = None
- if (xmlDoc.getElementsByTagName('title') and xmlDoc.getElementsByTagName('title')[0].parentNode.tagName != 'entry'):
- feed_data['title'] = xmlDoc.getElementsByTagName('title')[0].firstChild.nodeValue
- if (xmlDoc.getElementsByTagName('icon') and xmlDoc.getElementsByTagName('icon')[0].parentNode.tagName != 'entry'):
- feed_data['logo'] = xmlDoc.getElementsByTagName('icon')[0].firstChild.nodeValue
- item_list = xmlDoc.getElementsByTagName('entry')
- feed_data['items'] = []
- for item in item_list:
- message = {}
- if(item.getElementsByTagName('title') and item.getElementsByTagName('title')[0].firstChild):
- message['title'] = item.getElementsByTagName('title')[0].firstChild.nodeValue
- message['other_links'] = []
- message['img'] = []
- for link in item.getElementsByTagName('link'):
- if (link.attributes.has_key('rel') and link.attributes.get('rel').nodeValue == 'alternate'):
- message['link'] = link.attributes['href'].nodeValue
- elif (link.attributes.has_key('type') and link.attributes.get('type').nodeValue.find('image') != -1):
- message['img'].append(link.attributes['href'].nodeValue)
- else:
- if (link.attributes.has_key('title')):
- message['other_links'].append('<a href="'+link.attributes['href'].nodeValue+'" target="_blank">'+link.attributes['title'].nodeValue+'</a>')
- else:
- message['other_links'].append('<a href="'+link.attributes['href'].nodeValue+'"target="_blank">'+link.attributes['href'].nodeValue+'</a>')
- if (item.getElementsByTagName('content') and item.getElementsByTagName('content')[0].firstChild):
- message['content'] = stringConstructor(item.getElementsByTagName('content')[0])
- elif (item.getElementsByTagName('summary') and item.getElementsByTagName('summary')[0].firstChild):
- message['content'] = stringConstructor(item.getElementsByTagName('summary')[0])
- if (item.getElementsByTagName('updated') and item.getElementsByTagName('updated')[0].firstChild):
- message['date'] = item.getElementsByTagName('updated')[0].firstChild.nodeValue
- elif (item.getElementsByTagName('modified') and item.getElementsByTagName('modified')[0].firstChild):
- message['date'] = item.getElementsByTagName('modified')[0].firstChild.nodeValue
- message['md5'] = md5.new(str(message)).hexdigest()
- feed_data['items'].append(message)
- else:
- return {'title': 'This reader can\'t read this feed'}
- return feed_data
-
-
-class HTMLCleaner(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self.html = ''
- self.script = 0
- def handle_starttag(self, tag, attrs):
- if tag !='script' and tag !='input' and tag !='button' :
- self.html += '<'+tag+' '
- for attr in attrs:
- if not attr[0].startswith('on'):
- self.html += attr[0]+'=' +attr[1]+' '
- if tag=='a':
- self.html += 'target="_blank" '
- self.html += '>'
- else:
- self.script = 1
- def handle_data(self, data):
- if not self.script:
- self.html += data
- def handle_charref(self, name):
- self.html += '&#'+name+';'
- def handle_entityref(self, name):
- self.html += '&'+name+';'
- def handle_endtag(self, tag):
- if tag !='script' and tag !='input' and tag !='button' :
- self.html += '</'+tag+'>'
- else:
- self.script = 0
- def handle_startendtag(self, tag, attrs):
- if tag !='script' and tag !='input' and tag !='button' :
- self.html += '<'+tag+' '
- for attr in attrs:
- if not attr[0].startswith('on'):
- self.html += attr[0]+'=' +attr[1]+' '
- self.html += '/>'
-
-def cleanHTML(string):
- html = ''
- parser= HTMLCleaner()
- parser.feed(string)
- return parser.html
-
-def stringConstructor(domItem):
- string = ''
- for item in domItem.childNodes:
- if item.nodeType == 3:
- string = string + item.nodeValue
- elif item.nodeType == 1 and item.tagName != 'script' and item.tagName != 'input' and item.tagName != 'button':
- string = string + '<' + item.tagName + ' '
- if item.attributes:
- for att in item.attributes.items():
- if(not att[0].startswith('on')):
- string = string + att[0] + '=' + att[1] + ' '
- if item.tagName == 'a':
- string = string + 'target="_blank" '
- string = string + '>'
- string = string + stringConstructor(item)
- string = string + '</' + item.tagName + '>'
- return string
+import feedparser, md5, urllib2, socket
+
+def getRssDataAsDict(self, url, username=None, password=None):
+ result = {}
+ translate = self.Base_translateString
+ # no url, no feed to read
+ if url in ('', None, 'None',):
+ return {'title':translate('Please enter a valid Rss or Atom url in the preference form.')}
+
+ # use authentication or not?
+ handlers = []
+ if username is not None and password is not None:
+ passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ passman.add_password(None, url, username, password)
+ auth_handler = urllib2.HTTPBasicAuthHandler(passman)
+ handlers.append(auth_handler)
+
+ # set shorter timeouts and revert default at enf of read
+ default_timeout = socket.getdefaulttimeout()
+ socket.setdefaulttimeout(10.0)
+ d = feedparser.parse(url, handlers=handlers)
+ socket.setdefaulttimeout(default_timeout)
+
+ if d.bozo and isinstance(d.bozo_exception, urllib2.URLError):
+ # we have an URL error
+ return {'title':translate('Wrong Rss or Atom url or service temporary down.')}
+
+ # http status code checks
+ if d.status == 401:
+ return {'title': translate('Unauthorized, verify your authentication.')}
+ elif d.status == 404:
+ return {'title': translate('Page not found.')}
+
+ result['items'] = []
+ # some feeds may not provide logo
+ if d.feed.get('image', None) is not None:
+ result['logo'] = d.feed.image['href']
+ result['title'] = d.feed.title
+ result['link'] = d.feed.link
+ for entry in d.entries:
+ entry_dict = {}
+ entry_dict['title'] = entry['title']
+ entry_dict['link'] = entry['link']
+ entry_dict['other_links'] = [x['href'] for x in entry['links']]
+ entry_dict['md5'] = md5.new(entry['link']).hexdigest()
+ entry_dict['content'] = entry['summary']
+ entry_dict['date'] = entry['updated']
+ entry_dict['img'] = [x['href'] for x in entry.get('enclosures', [])]
+ entry_dict['updated_parsed'] = entry['updated_parsed']
+ result['items'].append(entry_dict)
+ # sort by date
+ result['items'] = sorted(result['items'], key=lambda k: k['updated_parsed'])
+ result['items'].reverse()
+ return result
\ No newline at end of file
Modified: erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/Base_getRssDataAsDocumentList.xml
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/Base_getRssDataAsDocumentList.xml?rev=40030&r1=40029&r2=40030&view=diff
==============================================================================
--- erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/Base_getRssDataAsDocumentList.xml [utf8] (original)
+++ erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/Base_getRssDataAsDocumentList.xml [utf8] Mon Nov 8 14:08:27 2010
@@ -69,7 +69,7 @@ else:\n
feed_url = str(preferences.get(\'preferred_rss_feed\',\'\'))\n
username = str(preferences.get(\'preferred_username\',\'\'))\n
password = str(preferences.get(\'preferred_password\',\'\'))\n
-results = context.Base_getRssDataAsDict(url = feed_url, username = username, password = password)\n
+results = context.Base_getRssDataAsDict(context, url = feed_url, username = username, password = password)\n
readItemList = {}\n
md5_list = []\n
message_list = []\n
Modified: erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/RssFeed_getSummaryAsHTML.xml
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/RssFeed_getSummaryAsHTML.xml?rev=40030&r1=40029&r2=40030&view=diff
==============================================================================
--- erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/RssFeed_getSummaryAsHTML.xml [utf8] (original)
+++ erp5/trunk/bt5/erp5_knowledge_pad/SkinTemplateItem/portal_skins/erp5_gadget/RssFeed_getSummaryAsHTML.xml [utf8] Mon Nov 8 14:08:27 2010
@@ -57,7 +57,7 @@
tal:attributes="class python: test(is_read, \'teaser read\', \'teaser unread\');\n
onclick string:if(this.className!=\'teaser read\'){this.className=\'teaser read\';;MochiKit.Async.doSimpleXMLHttpRequest(\'Base_setRssItemReadInSelection\', {\'selection_name\':\'${selection_name}\',\'item\':\'${md5}\'})};;toggle(\'${dom_id}\');">\n
<img class="thumbnail-image"\n
- tal:condition="python: image not in ((), None,)"\n
+ tal:condition="python: image not in ((), None, [])"\n
tal:attributes="src python: image[0]" alt="Thumbnail"/>\n
<span class="document-title"\n
tal:content="title"/> - \n
Modified: erp5/trunk/bt5/erp5_knowledge_pad/bt/revision
URL: http://svn.erp5.org/erp5/trunk/bt5/erp5_knowledge_pad/bt/revision?rev=40030&r1=40029&r2=40030&view=diff
==============================================================================
--- erp5/trunk/bt5/erp5_knowledge_pad/bt/revision [utf8] (original)
+++ erp5/trunk/bt5/erp5_knowledge_pad/bt/revision [utf8] Mon Nov 8 14:08:27 2010
@@ -1 +1 @@
-597
\ No newline at end of file
+601
\ No newline at end of file
More information about the Erp5-report
mailing list