path: root/lib/jython/Lib/xml/dom/ext/c14n.py
diff options
Diffstat (limited to 'lib/jython/Lib/xml/dom/ext/c14n.py')
1 files changed, 251 insertions, 0 deletions
diff --git a/lib/jython/Lib/xml/dom/ext/c14n.py b/lib/jython/Lib/xml/dom/ext/c14n.py
new file mode 100644
index 000000000..251ff278d
--- /dev/null
+++ b/lib/jython/Lib/xml/dom/ext/c14n.py
@@ -0,0 +1,251 @@
+#! /usr/bin/env python
+'''XML Canonicalization
+This module generates canonical XML, as defined in
+ http://www.w3.org/TR/xml-c14n
+It is limited in that it can only canonicalize an element and all its
+children; general document subsets are not supported.
+_copyright = '''Copyright 2001, Zolera Systems Inc. All Rights Reserved.
+Distributed under the terms of the Python 2.0 Copyright or later.'''
+from xml.dom import Node
+from xml.ns import XMLNS
+import re
+ import cStringIO
+ StringIO = cStringIO
+ import StringIO
+_attrs = lambda E: E.attributes or []
+_children = lambda E: E.childNodes or []
+def _sorter(n1, n2):
+ '''Sorting predicate for non-NS attributes.'''
+ i = cmp(n1.namespaceURI, n2.namespaceURI)
+ if i: return i
+ return cmp(n1.localName, n2.localName)
+def _sorter_ns(n1, n2):
+ '''Sorting predicate for NS attributes; "xmlns" always comes first.'''
+ if n1.localName == 'xmlns': return -1
+ if n2.localName == 'xmlns': return 1
+ return cmp(n1.localName, n2.localName)
+class _implementation:
+ '''Implementation class for C14N.'''
+ # Handlers for each node, by node type.
+ handlers = {}
+ # pattern/replacement list for whitespace stripping.
+ repats = (
+ ( re.compile(r'[ \t]+'), ' ' ),
+ ( re.compile(r'[\r\n]+'), '\n' ),
+ )
+ def __init__(self, node, write, nsdict={}, stripspace=0, nocomments=1):
+ '''Create and run the implementation.'''
+ if node.nodeType != Node.ELEMENT_NODE:
+ raise TypeError, 'Non-element node'
+ self.write, self.stripspace, self.nocomments = \
+ write, stripspace, nocomments
+ if nsdict == None or nsdict == {}:
+ nsdict = { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE }
+ self.ns_stack = [ nsdict ]
+ # Collect the initial list of xml:XXX attributes.
+ xmlattrs = []
+ for a in _attrs(node):
+ if a.namespaceURI == XMLNS.XML:
+ n = a.localName
+ xmlattrs.append(n)
+ # Walk up and get all xml:XXX attributes we inherit.
+ parent, inherited = node.parentNode, []
+ while parent:
+ if parent.nodeType != Node.ELEMENT_NODE: break
+ for a in _attrs(parent):
+ if a.namespaceURI != XMLNS.XML: continue
+ n = a.localName
+ if n not in xmlattrs:
+ xmlattrs.append(n)
+ inherited.append(a)
+ parent = parent.parentNode
+ self._do_element(node, inherited)
+ self.ns_stack.pop()
+ def _do_text(self, node):
+ 'Process a text node.'
+ s = node.data \
+ .replace("&", "&") \
+ .replace("<", "&lt;") \
+ .replace(">", "&gt;") \
+ .replace("\015", "&#xD;")
+ if self.stripspace:
+ for pat,repl in _implementation.repats: s = re.sub(pat, repl, s)
+ if s: self.write(s)
+ handlers[Node.TEXT_NODE] =_do_text
+ handlers[Node.CDATA_SECTION_NODE] =_do_text
+ def _do_pi(self, node):
+ '''Process a PI node. Since we start with an element, we're
+ never a child of the root, so we never write leading or trailing
+ #xA.
+ '''
+ W = self.write
+ W('<?')
+ W(node.nodeName)
+ s = node.data
+ if s:
+ W(' ')
+ W(s)
+ W('?>')
+ handlers[Node.PROCESSING_INSTRUCTION_NODE] =_do_pi
+ def _do_comment(self, node):
+ '''Process a comment node. Since we start with an element, we're
+ never a child of the root, so we never write leading or trailing
+ #xA.
+ '''
+ if self.nocomments: return
+ W = self.write
+ W('<!--')
+ W(node.data)
+ W('-->')
+ handlers[Node.COMMENT_NODE] =_do_comment
+ def _do_attr(self, n, value):
+ 'Process an attribute.'
+ W = self.write
+ W(' ')
+ W(n)
+ W('="')
+ s = value \
+ .replace("&", "&amp;") \
+ .replace("<", "&lt;") \
+ .replace('"', '&quot;') \
+ .replace('\011', '&#x9') \
+ .replace('\012', '&#xA') \
+ .replace('\015', '&#xD')
+ W(s)
+ W('"')
+ def _do_element(self, node, initialattrlist = []):
+ 'Process an element (and its children).'
+ name = node.nodeName
+ W = self.write
+ W('<')
+ W(name)
+ # Get parent namespace, make a copy for us to inherit.
+ parent_ns = self.ns_stack[-1]
+ my_ns = parent_ns.copy()
+ # Divide attributes into NS definitions and others.
+ nsnodes, others = [], initialattrlist[:]
+ for a in _attrs(node):
+ if a.namespaceURI == XMLNS.BASE:
+ nsnodes.append(a)
+ else:
+ others.append(a)
+ # Namespace attributes: update dictionary; if not already
+ # in parent, output it.
+ nsnodes.sort(_sorter_ns)
+ for a in nsnodes:
+ # Some DOMs seem to rename "xmlns='xxx'" strangely
+ n = a.nodeName
+ if n == "xmlns:":
+ key, n = "", "xmlns"
+ else:
+ key = a.localName
+ v = my_ns[key] = a.nodeValue
+ pval = parent_ns.get(key, None)
+ if n == "xmlns" and v in [ '', XMLNS.BASE ] \
+ and pval in [ '', XMLNS.BASE ]:
+ # Default namespace set to default value.
+ pass
+ elif v != pval:
+ self._do_attr(n, v)
+ # Other attributes: sort and output.
+ others.sort(_sorter)
+ for a in others: self._do_attr(a.nodeName, a.value)
+ W('>')
+ # Push our namespace dictionary, recurse, pop the dicionary.
+ self.ns_stack.append(my_ns)
+ for c in _children(node):
+ _implementation.handlers[c.nodeType](self, c)
+ # XXX Ignore unknown node types?
+ #handler = _implementation.handlers.get(c.nodeType, None)
+ #if handler: handler(self, c)
+ self.ns_stack.pop()
+ W('</%s>' % (name,))
+ handlers[Node.ELEMENT_NODE] =_do_element
+def Canonicalize(node, output=None, **kw):
+ '''Canonicalize a DOM element node and everything underneath it.
+ Return the text; if output is specified then output.write will
+ be called to output the text and None will be returned
+ Keyword parameters:
+ stripspace -- remove extra (almost all) whitespace from text nodes
+ nsdict -- a dictionary of prefix:uri namespace entries assumed
+ to exist in the surrounding context
+ comments -- keep comments if non-zero (default is zero)
+ '''
+ if not output: s = StringIO.StringIO()
+ _implementation(node,
+ (output and output.write) or s.write,
+ nsdict=kw.get('nsdict', {}),
+ stripspace=kw.get('stripspace', 0),
+ nocomments=kw.get('comments', 0) == 0,
+ )
+ if not output: return s.getvalue()
+if __name__ == '__main__':
+ text = '''<SOAP-ENV:Envelope xml:lang='en'
+ xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
+ xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchemaInstance"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchemaZ" xmlns:spare='foo'
+ SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
+ <SOAP-ENV:Body xmlns='test-uri'><?MYPI spenser?>
+ <zzz xsd:foo='xsdfoo' xsi:a='xsi:a'/>
+ <SOAP-ENC:byte>44</SOAP-ENC:byte> <!-- 1 -->
+ <Name xml:lang='en-GB'>This is the name</Name>Some
+content here on two lines.
+ <n2><![CDATA[<greeting>Hello</greeting>]]></n2> <!-- 3 -->
+ <n3 href='z&amp;zz' xsi:type='SOAP-ENC:string'>
+ more content. indented </n3>
+ <a2 xmlns:f='z' xmlns:aa='zz'><i xmlns:f='z'>12</i><t>rich salz</t></a2> <!-- 8 -->
+ </SOAP-ENV:Body>
+ <z xmlns='myns' id='zzz'>The value of n3</z>
+ <zz xmlns:spare='foo' xmlns='myns2' id='tri2'><inner>content</inner></zz>
+ print _copyright
+ from xml.dom.ext.reader import PyExpat
+ reader = PyExpat.Reader()
+ dom = reader.fromString(text)
+ for e in _children(dom):
+ if e.nodeType != Node.ELEMENT_NODE: continue
+ for ee in _children(e):
+ if ee.nodeType != Node.ELEMENT_NODE: continue
+ print '\n', '=' * 60
+ print Canonicalize(ee, nsdict={'spare':'foo'}, stripspace=1)
+ print '-' * 60
+ print Canonicalize(ee, stripspace=0)
+ print '-' * 60
+ print Canonicalize(ee, comments=1)
+ print '=' * 60