diff options
Diffstat (limited to 'server/src/org/jsoup/parser/XmlTreeBuilder.java')
-rw-r--r-- | server/src/org/jsoup/parser/XmlTreeBuilder.java | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/server/src/org/jsoup/parser/XmlTreeBuilder.java b/server/src/org/jsoup/parser/XmlTreeBuilder.java new file mode 100644 index 0000000000..3f03ad26ac --- /dev/null +++ b/server/src/org/jsoup/parser/XmlTreeBuilder.java @@ -0,0 +1,111 @@ +package org.jsoup.parser; + +import org.jsoup.helper.Validate; +import org.jsoup.nodes.*; + +import java.util.Iterator; + +/** + * @author Jonathan Hedley + */ +public class XmlTreeBuilder extends TreeBuilder { + @Override + protected void initialiseParse(String input, String baseUri, ParseErrorList errors) { + super.initialiseParse(input, baseUri, errors); + stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) + } + + @Override + protected boolean process(Token token) { + // start tag, end tag, doctype, comment, character, eof + switch (token.type) { + case StartTag: + insert(token.asStartTag()); + break; + case EndTag: + popStackToClose(token.asEndTag()); + break; + case Comment: + insert(token.asComment()); + break; + case Character: + insert(token.asCharacter()); + break; + case Doctype: + insert(token.asDoctype()); + break; + case EOF: // could put some normalisation here if desired + break; + default: + Validate.fail("Unexpected token type: " + token.type); + } + return true; + } + + private void insertNode(Node node) { + currentElement().appendChild(node); + } + + Element insert(Token.StartTag startTag) { + Tag tag = Tag.valueOf(startTag.name()); + // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html. + Element el = new Element(tag, baseUri, startTag.attributes); + insertNode(el); + if (startTag.isSelfClosing()) { + tokeniser.acknowledgeSelfClosingFlag(); + if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above. + tag.setSelfClosing(); + } else { + stack.add(el); + } + return el; + } + + void insert(Token.Comment commentToken) { + Comment comment = new Comment(commentToken.getData(), baseUri); + insertNode(comment); + } + + void insert(Token.Character characterToken) { + Node node = new TextNode(characterToken.getData(), baseUri); + insertNode(node); + } + + void insert(Token.Doctype d) { + DocumentType doctypeNode = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri); + insertNode(doctypeNode); + } + + /** + * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not + * found, skips. + * + * @param endTag + */ + private void popStackToClose(Token.EndTag endTag) { + String elName = endTag.name(); + Element firstFound = null; + + Iterator<Element> it = stack.descendingIterator(); + while (it.hasNext()) { + Element next = it.next(); + if (next.nodeName().equals(elName)) { + firstFound = next; + break; + } + } + if (firstFound == null) + return; // not found, skip + + it = stack.descendingIterator(); + while (it.hasNext()) { + Element next = it.next(); + if (next == firstFound) { + it.remove(); + break; + } else { + it.remove(); + } + } + } +} |