""" $Id: SAX.py,v 1.10 2000/11/27 17:42:56 kmacleod Exp $ """ import string import urllib import types import re import Orchard import Orchard.XML import sys from StringIO import StringIO if hasattr(sys, 'version_info'): isPy2 = 1 import xml.sax from xml.sax.handler import feature_namespaces from xml.sax import SAXException else: isPy2 = 0 from xml.sax import saxexts from xml.sax.saxlib import SAXException xmlns_ns = "http://www.w3.org/2000/xmlns/" class SAX(Orchard.Node): def __init__(self, *positional_args, **keyword_args): positional_args = (self,) + positional_args apply(Orchard.Node.__init__, positional_args, keyword_args) def parse(self, source, **keyword_args): if type(source) == types.StringType: if re.match(r'(?m)^\s*<', source) != None: file = StringIO(source) else: file = urllib.urlopen(source) else: file = source if hasattr(self, 'handler'): self.tmp_handler = self.handler if keyword_args.has_key('handler'): self.tmp_handler = keyword_args['handler'] if isPy2: self.parser = xml.sax.make_parser() self.parser.setFeature(feature_namespaces, 0) self.parser.setContentHandler(self) else: self.parser = saxexts.make_parser() self.parser.setDocumentHandler(self) self.parser.setErrorHandler(self) self.namespace_stack = [ { '_Default' : None, 'xmlns' : xmlns_ns } ] self.node_stack = [ ] self.result = None if isPy2: self.parser.parse(file) else: self.parser.parseFile(file) return self.result # # SAX event handlers # def setDocumentLocator(self, locator): pass def startDocument(self): document = Orchard.XML.Document() self.node_stack.append(document) if hasattr(self.tmp_handler, 'startDocument'): self.tmp_handler.startDocument(document) def endDocument(self): document = self.node_stack.pop() if hasattr(self.tmp_handler, 'endDocument'): self.result = self.tmp_handler.endDocument(document) def startElement(self, name, atts): self.namespace_stack.append(self.namespace_stack[-1].copy()) self._scan_namespaces(atts) attributes = [] if isPy2: att_names = atts.keys() else: att_names = [] for ii in range(0, len(atts)): att_names.append(atts[ii]) for att_name in att_names: namespace_uri = self._namespace_uri(att_name) attributes.append(Orchard.XML.Attribute( name=att_name, value=atts[att_name], namespace_uri=namespace_uri )) namespace_uri = self._namespace_uri(name) element = Orchard.XML.Element( name=name, namespace_uri=namespace_uri, attributes=attributes ) self.node_stack.append(element) if hasattr(self.tmp_handler, 'startElement'): self.tmp_handler.startElement(element) def _scan_namespaces(self, atts): if isPy2: att_names = atts.keys() else: att_names = [] for ii in range(0, len(atts)): att_names.append(atts[ii]) for att_name in att_names: if att_name == 'xmlns': self.namespace_stack[-1]['_Default'] = atts[att_name] elif att_name[0:6] == 'xmlns:': self.namespace_stack[-1][att_name[6:]] = atts[att_name] def _namespace_uri(self, name): name_parts = string.split(name, ':') if len(name_parts) == 1: if name_parts[0] == 'xmlns': return None else: return self.namespace_stack[-1]['_Default'] else: return self.namespace_stack[-1][name_parts[0]] def endElement(self, name): self.namespace_stack.pop() element = self.node_stack.pop() if hasattr(self.tmp_handler, 'endElement'): self.tmp_handler.endElement(element) def characters(self, ch, start=0, length=-1): if length == -1: # SAX2 characters = Orchard.XML.Characters( data=ch ) else: characters = Orchard.XML.Characters( data=ch[start:start+length] ) if hasattr(self.tmp_handler, 'characters'): self.tmp_handler.characters(characters) def ignorableWhitespace(self, ch, start, length): characters = Orchard.XML.IgnorableWhitespace( data=ch[start:start+length] ) if hasattr(self.tmp_handler, 'ignorableWhitespace'): self.tmp_handler.ignorableWhitespace(characters) def processingInstruction(self, target, data): pi = Orchard.XML.ProcessingInstruction( target=target, data=data ) if hasattr(self.tmp_handler, 'processingInstruction'): self.tmp_handler.processingInstruction(pi) def error(self, exception): pass def fatalError(self, exception): raise exception def warning(self, exception): pass