""" $Id: RSS.py,v 1.3 2000/11/21 22:58:21 kmacleod Exp $ """ import string import urllib import Orchard.RSS import Orchard.Parsers.SAX from StringIO import StringIO rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf_about = (rdf, 'about') rdf_resource = (rdf, 'resource') in_string = "in_string" in_node = "in_node" in_xml = "in_xml" after_xml = "after_xml" class RSS(Orchard.Node): def __init__(self, *positional_args, **keyword_args): positional_args = (self,) + positional_args apply(Orchard.Node.__init__, positional_args, keyword_args) def parse(self, source): parser = Orchard.Parsers.SAX.SAX( handler=self ) return parser.parse(source) # # SAX event handlers # def setDocumentLocator(self, locator): pass def startDocument(self, document): self.channel = None self.items = [] self.nodes = {} self.image = None self.textinput = None self.state = None def endDocument(self, document): if self.image != None: self.channel.image = self.nodes[self.image] if self.textinput != None: self.channel.textinput = self.nodes[self.textinput] self.channel.items = [] for item in self.items: self.channel.items.append(self.nodes[item]) return self.channel def startElement(self, element): name = element.name if self.state is in_xml: self.node_stack.append(element) self.handler.startElement(element) elif self.state is in_string: self.handler = Orchard.TreeBuilder() document = Orchard.XML.Document() self.node_stack = [ document ] self.handler.startDocument(document) self.node_stack.append(element) self.handler.startElement(element) self.state = in_xml elif ( element.namespace_uri == rdf and element.local_name == 'RDF' ): self.state = in_node elif name == 'channel': self.channel = Orchard.RSS.Channel() self.channel[rdf_about] = element.attributes[rdf_about].value self.current_node = self.channel self.state = in_node self.in_channel = 1 elif ( element.namespace_uri == rdf and element.local_name == 'li' ): self.items.append(element.attributes[rdf_resource].value) elif ( name == 'items' or (element.namespace_uri == rdf and element.local_name == 'Seq') ): pass elif name == 'item': item = Orchard.RSS.Item() item[rdf_about] = element.attributes[rdf_about].value self.current_node = item self.nodes[item[rdf_about]] = item self.state = in_node elif name == 'image': if self.in_channel: self.image = element.attributes[rdf_resource].value else: image = Orchard.RSS.Image() image[rdf_about] = element.attributes[rdf_about].value self.current_node = image self.nodes[image[rdf_about]] = image self.state = in_node elif name == 'textinput': if self.in_channel: self.textinput = element.attributes[rdf_resource].value else: textinput = Orchard.RSS.TextInput() textinput[rdf_about] = element.attributes[rdf_about].value self.current_node = textinput self.nodes[textinput[rdf_about]] = textinput self.state = in_node else: self.string = "" self.state = in_string def endElement(self, element): state = self.state if state is in_xml: element = self.node_stack.pop() self.handler.endElement(element) if len(self.node_stack) == 1: document = self.node_stack.pop() self.xml = self.handler.endDocument(document).root self.state = after_xml elif state is in_string: self.current_node[(element.namespace_uri, element.local_name)] = self.string self.state = in_node elif self.in_channel and element.name == 'channel': self.in_channel = 0 elif state is in_node: pass elif state is after_xml: self.current_node[(element.namespace_uri, element.local_name)] = self.xml def characters(self, chars): if self.state is in_xml: self.handler.characters(chars) elif self.state is in_string: self.string = self.string + chars.data def ignorableWhitespace(self, chars): if self.state is in_xml: self.handler.ignorableWhitespace(chars) def processingInstruction(self, pi): if self.state is in_xml: self.handler.processingInstruction(pi)