1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
|
"""
This module contains the core classes of version 2.0 of SAX for Python.
This file provides only default classes with absolutely minimum
functionality, from which drivers and applications can be subclassed.
Many of these classes are empty and are included only as documentation
of the interfaces.
$Id: saxlib.py,v 1.11 2001/01/27 09:03:52 loewis Exp $
"""
version = '2.0beta'
# A number of interfaces used to live in saxlib, but are now in
# various other modules for Python 2 compatibility. If nobody uses
# them here any longer, the references can be removed
from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
from _exceptions import *
from handler import \
feature_namespaces,\
feature_namespace_prefixes,\
feature_string_interning,\
feature_validation,\
feature_external_ges,\
feature_external_pes,\
all_features,\
property_lexical_handler,\
property_declaration_handler,\
property_dom_node,\
property_xml_string,\
all_properties
#============================================================================
#
# MAIN INTERFACES
#
#============================================================================
# ===== XMLFILTER =====
class XMLFilter(XMLReader):
"""Interface for a SAX2 parser filter.
A parser filter is an XMLReader that gets its events from another
XMLReader (which may in turn also be a filter) rather than from a
primary source like a document or other non-SAX data source.
Filters can modify a stream of events before passing it on to its
handlers."""
def __init__(self, parent = None):
"""Creates a filter instance, allowing applications to set the
parent on instantiation."""
XMLReader.__init__(self)
self._parent = parent
def setParent(self, parent):
"""Sets the parent XMLReader of this filter. The argument may
not be None."""
self._parent = parent
def getParent(self):
"Returns the parent of this filter."
return self._parent
# ===== ATTRIBUTES =====
class Attributes:
"""Interface for a list of XML attributes.
Contains a list of XML attributes, accessible by name."""
def getLength(self):
"Returns the number of attributes in the list."
raise NotImplementedError("This method must be implemented!")
def getType(self, name):
"Returns the type of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValue(self, name):
"Returns the value of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValueByQName(self, name):
"""Returns the value of the attribute with the given raw (or
qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNameByQName(self, name):
"""Returns the namespace name of the attribute with the given
raw (or qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNames(self):
"""Returns a list of the names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def getQNames(self):
"""Returns a list of the raw qualified names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def __len__(self):
"Alias for getLength."
raise NotImplementedError("This method must be implemented!")
def __getitem__(self, name):
"Alias for getValue."
raise NotImplementedError("This method must be implemented!")
def keys(self):
"Returns a list of the attribute names in the list."
raise NotImplementedError("This method must be implemented!")
def has_key(self, name):
"True if the attribute is in the list, false otherwise."
raise NotImplementedError("This method must be implemented!")
def get(self, name, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
raise NotImplementedError("This method must be implemented!")
def copy(self):
"Return a copy of the Attributes object."
raise NotImplementedError("This method must be implemented!")
def items(self):
"Return a list of (attribute_name, value) pairs."
raise NotImplementedError("This method must be implemented!")
def values(self):
"Return a list of all attribute values."
raise NotImplementedError("This method must be implemented!")
#============================================================================
#
# HANDLER INTERFACES
#
#============================================================================
# ===== DECLHANDLER =====
class DeclHandler:
"""Optional SAX2 handler for DTD declaration events.
Note that some DTD declarations are already reported through the
DTDHandler interface. All events reported to this handler will
occur between the startDTD and endDTD events of the
LexicalHandler.
To se the DeclHandler for an XMLReader, use the setProperty method
with the identifier http://xml.org/sax/handlers/DeclHandler."""
def attributeDecl(self, elem_name, attr_name, type, value_def, value):
"""Report an attribute type declaration.
Only the first declaration will be reported. The type will be
one of the strings "CDATA", "ID", "IDREF", "IDREFS",
"NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
a list of names (in the case of enumerated definitions).
elem_name is the element type name, attr_name the attribute
type name, type a string representing the attribute type,
value_def a string representing the default declaration
('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
representing the attribute's default value, or None if there
is none."""
def elementDecl(self, elem_name, content_model):
"""Report an element type declaration.
Only the first declaration will be reported.
content_model is the string 'EMPTY', the string 'ANY' or the content
model structure represented as tuple (separator, tokens, modifier)
where separator is the separator in the token list (that is, '|' or
','), tokens is the list of tokens (element type names or tuples
representing parentheses) and modifier is the quantity modifier
('*', '?' or '+')."""
def internalEntityDecl(self, name, value):
"""Report an internal entity declaration.
Only the first declaration of an entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. value is the replacement text of
the entity."""
def externalEntityDecl(self, name, public_id, system_id):
"""Report a parsed entity declaration. (Unparsed entities are
reported to the DTDHandler.)
Only the first declaration for each entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. public_id and system_id are the
public and system identifiers of the entity. public_id will be
None if none were declared."""
# ===== LEXICALHANDLER =====
class LexicalHandler:
"""Optional SAX2 handler for lexical events.
This handler is used to obtain lexical information about an XML
document, that is, information about how the document was encoded
(as opposed to what it contains, which is reported to the
ContentHandler), such as comments and CDATA marked section
boundaries.
To set the LexicalHandler of an XMLReader, use the setProperty
method with the property identifier
'http://xml.org/sax/handlers/LexicalHandler'. There is no
guarantee that the XMLReader will support or recognize this
property."""
def comment(self, content):
"""Reports a comment anywhere in the document (including the
DTD and outside the document element).
content is a string that holds the contents of the comment."""
def startDTD(self, name, public_id, system_id):
"""Report the start of the DTD declarations, if the document
has an associated DTD.
A startEntity event will be reported before declaration events
from the external DTD subset are reported, and this can be
used to infer from which subset DTD declarations derive.
name is the name of the document element type, public_id the
public identifier of the DTD (or None if none were supplied)
and system_id the system identfier of the external subset (or
None if none were supplied)."""
def endDTD(self):
"Signals the end of DTD declarations."
def startEntity(self, name):
"""Report the beginning of an entity.
The start and end of the document entity is not reported. The
start and end of the external DTD subset is reported with the
pseudo-name '[dtd]'.
Skipped entities will be reported through the skippedEntity
event of the ContentHandler rather than through this event.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'."""
def endEntity(self, name):
"""Reports the end of an entity. name is the name of the
entity, and follows the same conventions as for
startEntity."""
def startCDATA(self):
"""Reports the beginning of a CDATA marked section.
The contents of the CDATA marked section will be reported
through the characters event."""
def endCDATA(self):
"Reports the end of a CDATA marked section."
#============================================================================
#
# SAX 1.0 COMPATIBILITY CLASSES
# Note that these are all deprecated.
#
#============================================================================
# ===== ATTRIBUTELIST =====
class AttributeList:
"""Interface for an attribute list. This interface provides
information about a list of attributes for an element (only
specified or defaulted attributes will be reported). Note that the
information returned by this object will be valid only during the
scope of the DocumentHandler.startElement callback, and the
attributes will not necessarily be provided in the order declared
or specified."""
def getLength(self):
"Return the number of attributes in list."
def getName(self, i):
"Return the name of an attribute in the list."
def getType(self, i):
"""Return the type of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def getValue(self, i):
"""Return the value of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def __len__(self):
"Alias for getLength."
def __getitem__(self, key):
"Alias for getName (if key is an integer) and getValue (if string)."
def keys(self):
"Returns a list of the attribute names."
def has_key(self, key):
"True if the attribute is in the list, false otherwise."
def get(self, key, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
def copy(self):
"Return a copy of the AttributeList."
def items(self):
"Return a list of (attribute_name,value) pairs."
def values(self):
"Return a list of all attribute values."
# ===== DOCUMENTHANDLER =====
class DocumentHandler:
"""Handle general document events. This is the main client
interface for SAX: it contains callbacks for the most important
document events, such as the start and end of elements. You need
to create an object that implements this interface, and then
register it with the Parser. If you do not want to implement
the entire interface, you can derive a class from HandlerBase,
which implements the default functionality. You can find the
location of any document event using the Locator interface
supplied by setDocumentLocator()."""
def characters(self, ch, start, length):
"Handle a character data event."
def endDocument(self):
"Handle an event for the end of a document."
def endElement(self, name):
"Handle an event for the end of an element."
def ignorableWhitespace(self, ch, start, length):
"Handle an event for ignorable whitespace in element content."
def processingInstruction(self, target, data):
"Handle a processing instruction event."
def setDocumentLocator(self, locator):
"Receive an object for locating the origin of SAX document events."
def startDocument(self):
"Handle an event for the beginning of a document."
def startElement(self, name, atts):
"Handle an event for the beginning of an element."
# ===== HANDLERBASE =====
class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
ErrorHandler):
"""Default base class for handlers. This class implements the
default behaviour for four SAX interfaces: EntityResolver,
DTDHandler, DocumentHandler, and ErrorHandler: rather
than implementing those full interfaces, you may simply extend
this class and override the methods that you need. Note that the
use of this class is optional (you are free to implement the
interfaces directly if you wish)."""
# ===== PARSER =====
class Parser:
"""Basic interface for SAX (Simple API for XML) parsers. All SAX
parsers must implement this basic interface: it allows users to
register handlers for different types of events and to initiate a
parse from a URI, a character stream, or a byte stream. SAX
parsers should also implement a zero-argument constructor."""
def __init__(self):
self.doc_handler = DocumentHandler()
self.dtd_handler = DTDHandler()
self.ent_handler = EntityResolver()
self.err_handler = ErrorHandler()
def parse(self, systemId):
"Parse an XML document from a system identifier."
def parseFile(self, fileobj):
"Parse an XML document from a file-like object."
def setDocumentHandler(self, handler):
"Register an object to receive basic document-related events."
self.doc_handler=handler
def setDTDHandler(self, handler):
"Register an object to receive basic DTD-related events."
self.dtd_handler=handler
def setEntityResolver(self, resolver):
"Register an object to resolve external entities."
self.ent_handler=resolver
def setErrorHandler(self, handler):
"Register an object to receive error-message events."
self.err_handler=handler
def setLocale(self, locale):
"""Allow an application to set the locale for errors and warnings.
SAX parsers are not required to provide localisation for errors
and warnings; if they cannot support the requested locale,
however, they must throw a SAX exception. Applications may
request a locale change in the middle of a parse."""
raise SAXNotSupportedException("Locale support not implemented")
|