This is the code for a simple
RSS Reader. It is based on
David Gravereaux's code (at
TclHttpd RSS Processing) and splitted in a library to do the real reading and a
RSSReader GUI.
package provide czrss 0.11
package require snit
package require tdom
package require http
if {![catch {package require autoproxy}]} {
autoproxy::init
}
# This is the class representing an RSS document
snit::type ::czrss::doc {
variable xpath
variable channel
variable items
variable url
# Konstruktor for a given URI
constructor { uri } {
set url $uri
$self load
}
method load { } {
# load xml to temporary file
set file "[ clock seconds].xml"
set out [ open $file w ]
http::geturl $url -channel $out
close $out
# load xml into dom from temporary file
set doc [ dom parse -channel [tDOM::xmlOpenFile $file] ]
set _root [ $doc documentElement ]
file delete $file
set root [$doc documentElement]
switch [getRSSVersion $doc] {
0.91 - 0.92 - 0.93 - 2.0 {
set xpath(titleXpath) {/rss/channel/title/text()}
set xpath(linkXpath) {/rss/channel/link/text()}
set xpath(imgNodeXpath) {/rss/channel/image/title}
set xpath(imgTitleXpath) {/rss/channel/image/title/text()}
set xpath(imgLinkXpath) {/rss/channel/image/url/text()}
set xpath(imgWidthXpath) {/rss/channel/image/width/text()}
set xpath(imgHeightXpath) {/rss/channel/image/height/text()}
set xpath(storiesXpath) {/rss/channel/item}
set xpath(itemTitleXpath) {title/text()}
set xpath(itemLinkXpath) {link/text()}
set xpath(itemPubDateXpath) {pubDate/text()}
set xpath(itemDescXpath) {description/text()}
}
1.0 {
set xpath(titleXpath) {/rdf:RDF/*[local-name()='channel']/*[local-name()='title']/text()}
set xpath(linkXpath) {/rdf:RDF/*[local-name()='channel']/*[local-name()='link']/text()}
set xpath(imgNodeXpath) {/rdf:RDF/*[local-name()='image']}
set xpath(imgTitleXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='title']/text()}
set xpath(imgLinkXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='url']/text()}
set xpath(imgWidthXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='width']/text()}
set xpath(imgHeightXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='height']/text()}
set xpath(storiesXpath) {/rdf:RDF/*[local-name()='item']}
set xpath(itemTitleXpath) {*[local-name()='title']/text()}
set xpath(itemLinkXpath) {*[local-name()='link']/text()}
set xpath(itemPubDateXpath) {*[local-name()='pubDate']/text()}
set xpath(itemDescXpath) {*[local-name()='description']/text()}
}
default {
error "Unssupported schema [getRSSVersion $doc]"
}
}
# Channel
set cN [ $_root child 1 channel ]
set channel [::czrss::channel create %AUTO% $self $cN]
puts $channel
# Items
set items {}
set stories [$_root selectNodes $xpath(storiesXpath) ]
foreach iN $stories {
lappend items [ ::czrss::item create %AUTO% $self $iN ]
}
}
# returns the XPath Query for a given type
method xpath { key } {
return $xpath($key)
}
# returns the channel object
method channel {} {
return $channel
}
# returns a list of items
method items {} {
return $items
}
# detects the RSS version of the document
proc getRSSVersion {doc} {
set root [$doc documentElement]
switch [$root nodeName] {
rss {
if {[$root hasAttribute version]} {
return [$root getAttribute version]
}
# Best guess as most stuff is optional...
return 0.92
}
rdf:RDF {
return 1.0
}
default {
return 0
}
}
}
}
# this class is used to contain rss items
snit::type ::czrss::item {
variable _node
variable _doc
constructor {doc node } {
variable history
set _doc $doc
set _node $node
}
# get the title
method title { } {
set xpath [$_doc xpath itemTitleXpath]
return [ ::czrss::nodeTxt $_node $xpath]
}
# get the link
method link {} {
set xpath [$_doc xpath itemLinkXpath]
return [ ::czrss::nodeUri $_node $xpath]
}
# get the description
method description {} {
set xpath [$_doc xpath itemDescXpath]
return [ ::czrss::nodeTxt $_node $xpath]
}
# return the publication date as string
method pubDate {} {
set xpath [$_doc xpath itemPubDateXpath]
return [ ::czrss::nodeTxt $_node $xpath]
}
}
# this class contains information on the channel
snit::type ::czrss::channel {
variable _doc
variable _root
constructor { doc root} {
set _doc $doc
set _root $root
}
# get the title
method title { } {
set xpath [$_doc xpath titleXpath]
return [ ::czrss::nodeTxt $_root $xpath]
}
# get the image link
method imgLink {} {
set xpath [$_doc xpath imgLinkXpath]
return [ ::czrss::nodeUri $_root $xpath]
}
# get the image title
method imgTitle {} {
set xpath [$_doc xpath imgTitleXpath]
return [ ::czrss::nodeUri $_root $xpath]
}
# get the image width
method imgWidth {} {
set xpath [$_doc xpath imgWidthXpath]
return [ ::czrss::nodeTxt $_root $xpath]
}
# get the image height
method imgHeight {} {
set xpath [$_doc xpath imgHeightXpath]
return [ ::czrss::nodeTxt $_root $xpath]
}
}
# this namespace contains some utility methods
namespace eval ::czrss {
proc encUri {uri} {
return [string map { & %26 } $uri]
}
proc encTxt {txt} {
return [string map { & & < < > > } $txt]
}
proc nodeUri {node xpath} {
if {[$node selectNode $xpath] != ""} {
# Only if there is a lonely &, quote it back to an entity.
return [encUri [[$node selectNode $xpath] nodeValue]]
} else {
return ""
}
}
proc nodeTxt {node xpath} {
if {[$node selectNode $xpath] != ""} {
return [[$node selectNode $xpath] nodeValue]
} else {
return ""
}
}
}
2014-04-28: The
encUri command is insufficient. At the very least it needs to convert the
% character too, and the space character should be converted to either
+ or
%20. The following implementation is a little bit better (even though it possibly converts more characters than it needs to):
proc encUri {uri} {
set res {}
foreach c [split $uri {}] {
append res [if {[string match {[A-Za-z.0-9!()'*_~-]} $c]} {
set c
} else {
format %%%02X [scan $c %c]
}]
}
set res
}