summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/parser/XmlTreeBuilder.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/parser/XmlTreeBuilder.java')
-rw-r--r--server/src/org/jsoup/parser/XmlTreeBuilder.java111
1 files changed, 111 insertions, 0 deletions
diff --git a/server/src/org/jsoup/parser/XmlTreeBuilder.java b/server/src/org/jsoup/parser/XmlTreeBuilder.java
new file mode 100644
index 0000000000..3f03ad26ac
--- /dev/null
+++ b/server/src/org/jsoup/parser/XmlTreeBuilder.java
@@ -0,0 +1,111 @@
+package org.jsoup.parser;
+
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.*;
+
+import java.util.Iterator;
+
+/**
+ * @author Jonathan Hedley
+ */
+public class XmlTreeBuilder extends TreeBuilder {
+ @Override
+ protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
+ super.initialiseParse(input, baseUri, errors);
+ stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
+ }
+
+ @Override
+ protected boolean process(Token token) {
+ // start tag, end tag, doctype, comment, character, eof
+ switch (token.type) {
+ case StartTag:
+ insert(token.asStartTag());
+ break;
+ case EndTag:
+ popStackToClose(token.asEndTag());
+ break;
+ case Comment:
+ insert(token.asComment());
+ break;
+ case Character:
+ insert(token.asCharacter());
+ break;
+ case Doctype:
+ insert(token.asDoctype());
+ break;
+ case EOF: // could put some normalisation here if desired
+ break;
+ default:
+ Validate.fail("Unexpected token type: " + token.type);
+ }
+ return true;
+ }
+
+ private void insertNode(Node node) {
+ currentElement().appendChild(node);
+ }
+
+ Element insert(Token.StartTag startTag) {
+ Tag tag = Tag.valueOf(startTag.name());
+ // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
+ Element el = new Element(tag, baseUri, startTag.attributes);
+ insertNode(el);
+ if (startTag.isSelfClosing()) {
+ tokeniser.acknowledgeSelfClosingFlag();
+ if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above.
+ tag.setSelfClosing();
+ } else {
+ stack.add(el);
+ }
+ return el;
+ }
+
+ void insert(Token.Comment commentToken) {
+ Comment comment = new Comment(commentToken.getData(), baseUri);
+ insertNode(comment);
+ }
+
+ void insert(Token.Character characterToken) {
+ Node node = new TextNode(characterToken.getData(), baseUri);
+ insertNode(node);
+ }
+
+ void insert(Token.Doctype d) {
+ DocumentType doctypeNode = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
+ insertNode(doctypeNode);
+ }
+
+ /**
+ * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
+ * found, skips.
+ *
+ * @param endTag
+ */
+ private void popStackToClose(Token.EndTag endTag) {
+ String elName = endTag.name();
+ Element firstFound = null;
+
+ Iterator<Element> it = stack.descendingIterator();
+ while (it.hasNext()) {
+ Element next = it.next();
+ if (next.nodeName().equals(elName)) {
+ firstFound = next;
+ break;
+ }
+ }
+ if (firstFound == null)
+ return; // not found, skip
+
+ it = stack.descendingIterator();
+ while (it.hasNext()) {
+ Element next = it.next();
+ if (next == firstFound) {
+ it.remove();
+ break;
+ } else {
+ it.remove();
+ }
+ }
+ }
+}