You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

expatreader.py 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. """
  2. SAX driver for the Pyexpat C module. This driver works with
  3. pyexpat.__version__ == '2.22'.
  4. """
  5. version = "0.20"
  6. from xml.sax._exceptions import *
  7. try:
  8. from xml.parsers import expat
  9. except ImportError:
  10. raise SAXReaderNotAvailable("expat not supported",None)
  11. from xml.sax import xmlreader, saxutils, handler
  12. AttributesImpl = xmlreader.AttributesImpl
  13. AttributesNSImpl = xmlreader.AttributesNSImpl
  14. import string
  15. # --- ExpatParser
  16. class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
  17. "SAX driver for the Pyexpat C module."
  18. def __init__(self, namespaceHandling=0, bufsize=2**16-20):
  19. xmlreader.IncrementalParser.__init__(self, bufsize)
  20. self._source = xmlreader.InputSource()
  21. self._parser = None
  22. self._namespaces = namespaceHandling
  23. self._lex_handler_prop = None
  24. self._parsing = 0
  25. self._entity_stack = []
  26. # XMLReader methods
  27. def parse(self, source):
  28. "Parse an XML document from a URL or an InputSource."
  29. source = saxutils.prepare_input_source(source)
  30. self._source = source
  31. self.reset()
  32. self._cont_handler.setDocumentLocator(self)
  33. xmlreader.IncrementalParser.parse(self, source)
  34. def prepareParser(self, source):
  35. if source.getSystemId() != None:
  36. self._parser.SetBase(source.getSystemId())
  37. def getFeature(self, name):
  38. if name == handler.feature_namespaces:
  39. return self._namespaces
  40. raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
  41. def setFeature(self, name, state):
  42. if self._parsing:
  43. raise SAXNotSupportedException("Cannot set features while parsing")
  44. if name == handler.feature_namespaces:
  45. self._namespaces = state
  46. else:
  47. raise SAXNotRecognizedException("Feature '%s' not recognized" %
  48. name)
  49. def getProperty(self, name):
  50. if name == handler.property_lexical_handler:
  51. return self._lex_handler_prop
  52. raise SAXNotRecognizedException("Property '%s' not recognized" % name)
  53. def setProperty(self, name, value):
  54. if name == handler.property_lexical_handler:
  55. self._lex_handler_prop = value
  56. else:
  57. raise SAXNotRecognizedException("Property '%s' not recognized" % name)
  58. # IncrementalParser methods
  59. def feed(self, data, isFinal = 0):
  60. if not self._parsing:
  61. self.reset()
  62. self._parsing = 1
  63. self._cont_handler.startDocument()
  64. try:
  65. # The isFinal parameter is internal to the expat reader.
  66. # If it is set to true, expat will check validity of the entire
  67. # document. When feeding chunks, they are not normally final -
  68. # except when invoked from close.
  69. self._parser.Parse(data, isFinal)
  70. except expat.error:
  71. error_code = self._parser.ErrorCode
  72. exc = SAXParseException(expat.ErrorString(error_code), None, self)
  73. # FIXME: when to invoke error()?
  74. self._err_handler.fatalError(exc)
  75. def close(self):
  76. if self._entity_stack:
  77. # If we are completing an external entity, do nothing here
  78. return
  79. self.feed("", isFinal = 1)
  80. self._cont_handler.endDocument()
  81. self._parsing = 0
  82. # break cycle created by expat handlers pointing to our methods
  83. self._parser = None
  84. def reset(self):
  85. if self._namespaces:
  86. self._parser = expat.ParserCreate(None, " ")
  87. self._parser.StartElementHandler = self.start_element_ns
  88. self._parser.EndElementHandler = self.end_element_ns
  89. else:
  90. self._parser = expat.ParserCreate()
  91. self._parser.StartElementHandler = self.start_element
  92. self._parser.EndElementHandler = self.end_element
  93. self._parser.ProcessingInstructionHandler = \
  94. self._cont_handler.processingInstruction
  95. self._parser.CharacterDataHandler = self._cont_handler.characters
  96. self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
  97. self._parser.NotationDeclHandler = self.notation_decl
  98. self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
  99. self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
  100. self._decl_handler_prop = None
  101. if self._lex_handler_prop:
  102. self._parser.CommentHandler = self._lex_handler_prop.comment
  103. self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA
  104. self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA
  105. # self._parser.DefaultHandler =
  106. # self._parser.DefaultHandlerExpand =
  107. # self._parser.NotStandaloneHandler =
  108. self._parser.ExternalEntityRefHandler = self.external_entity_ref
  109. self._parsing = 0
  110. self._entity_stack = []
  111. # Locator methods
  112. def getColumnNumber(self):
  113. if self._parser is None:
  114. return None
  115. return self._parser.ErrorColumnNumber
  116. def getLineNumber(self):
  117. if self._parser is None:
  118. return 1
  119. return self._parser.ErrorLineNumber
  120. def getPublicId(self):
  121. return self._source.getPublicId()
  122. def getSystemId(self):
  123. return self._source.getSystemId()
  124. # event handlers
  125. def start_element(self, name, attrs):
  126. self._cont_handler.startElement(name, AttributesImpl(attrs))
  127. def end_element(self, name):
  128. self._cont_handler.endElement(name)
  129. def start_element_ns(self, name, attrs):
  130. pair = string.split(name)
  131. if len(pair) == 1:
  132. pair = (None, name)
  133. else:
  134. pair = tuple(pair)
  135. newattrs = {}
  136. for (aname, value) in attrs.items():
  137. apair = string.split(aname)
  138. if len(apair) == 1:
  139. apair = (None, aname)
  140. else:
  141. apair = tuple(apair)
  142. newattrs[apair] = value
  143. self._cont_handler.startElementNS(pair, None,
  144. AttributesNSImpl(newattrs, {}))
  145. def end_element_ns(self, name):
  146. pair = string.split(name)
  147. if len(pair) == 1:
  148. pair = (None, name)
  149. else:
  150. pair = tuple(pair)
  151. self._cont_handler.endElementNS(pair, None)
  152. # this is not used (call directly to ContentHandler)
  153. def processing_instruction(self, target, data):
  154. self._cont_handler.processingInstruction(target, data)
  155. # this is not used (call directly to ContentHandler)
  156. def character_data(self, data):
  157. self._cont_handler.characters(data)
  158. def start_namespace_decl(self, prefix, uri):
  159. self._cont_handler.startPrefixMapping(prefix, uri)
  160. def end_namespace_decl(self, prefix):
  161. self._cont_handler.endPrefixMapping(prefix)
  162. def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
  163. self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
  164. def notation_decl(self, name, base, sysid, pubid):
  165. self._dtd_handler.notationDecl(name, pubid, sysid)
  166. def external_entity_ref(self, context, base, sysid, pubid):
  167. source = self._ent_handler.resolveEntity(pubid, sysid)
  168. source = saxutils.prepare_input_source(source,
  169. self._source.getSystemId() or
  170. "")
  171. self._entity_stack.append((self._parser, self._source))
  172. self._parser = self._parser.ExternalEntityParserCreate(context)
  173. self._source = source
  174. try:
  175. xmlreader.IncrementalParser.parse(self, source)
  176. except:
  177. return 0 # FIXME: save error info here?
  178. (self._parser, self._source) = self._entity_stack[-1]
  179. del self._entity_stack[-1]
  180. return 1
  181. # ---
  182. def create_parser(*args, **kwargs):
  183. return apply(ExpatParser, args, kwargs)
  184. # ---
  185. if __name__ == "__main__":
  186. import xml.sax
  187. p = create_parser()
  188. p.setContentHandler(xml.sax.XMLGenerator())
  189. p.setErrorHandler(xml.sax.ErrorHandler())
  190. p.parse("../../../hamlet.xml")