diff options
author | Leif Åstrand <leif@vaadin.com> | 2012-09-05 19:50:56 +0300 |
---|---|---|
committer | Leif Åstrand <leif@vaadin.com> | 2012-09-05 19:51:15 +0300 |
commit | 7d25670284b11c7c62ba25183f265227cb3dba83 (patch) | |
tree | c8e76eb70dd3cdd5cf59a99419635f2188b25c24 /server/src/org/jsoup/parser/HtmlTreeBuilder.java | |
parent | 1d0c96de9595c243d88471476d21e5f248be63f7 (diff) | |
download | vaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.tar.gz vaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.zip |
Reformat project
Diffstat (limited to 'server/src/org/jsoup/parser/HtmlTreeBuilder.java')
-rw-r--r-- | server/src/org/jsoup/parser/HtmlTreeBuilder.java | 286 |
1 files changed, 184 insertions, 102 deletions
diff --git a/server/src/org/jsoup/parser/HtmlTreeBuilder.java b/server/src/org/jsoup/parser/HtmlTreeBuilder.java index 457a4c3249..f09ab8794c 100644 --- a/server/src/org/jsoup/parser/HtmlTreeBuilder.java +++ b/server/src/org/jsoup/parser/HtmlTreeBuilder.java @@ -1,15 +1,20 @@ package org.jsoup.parser; -import org.jsoup.helper.DescendableLinkedList; -import org.jsoup.helper.StringUtil; -import org.jsoup.helper.Validate; -import org.jsoup.nodes.*; - import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import org.jsoup.helper.DescendableLinkedList; +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; +import org.jsoup.nodes.Comment; +import org.jsoup.nodes.DataNode; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; + /** * HTML Tree Builder; creates a DOM from Tokens. */ @@ -21,15 +26,26 @@ class HtmlTreeBuilder extends TreeBuilder { private boolean baseUriSetFromDoc = false; private Element headElement; // the current head element private Element formElement; // the current form element - private Element contextElement; // fragment parse context -- could be null even if fragment parsing - private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements - private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out + private Element contextElement; // fragment parse context -- could be null + // even if fragment parsing + private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active + // (open) + // formatting + // elements + private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars + // in + // table + // to + // be + // shifted + // out private boolean framesetOk = true; // if ok to go into frameset private boolean fosterInserts = false; // if next inserts should be fostered private boolean fragmentParsing = false; // if parsing a fragment of html - HtmlTreeBuilder() {} + HtmlTreeBuilder() { + } @Override Document parse(String input, String baseUri, ParseErrorList errors) { @@ -37,7 +53,8 @@ class HtmlTreeBuilder extends TreeBuilder { return super.parse(input, baseUri, errors); } - List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) { + List<Node> parseFragment(String inputFragment, Element context, + String baseUri, ParseErrorList errors) { // context may be null state = HtmlTreeBuilderState.Initial; initialiseParse(inputFragment, baseUri, errors); @@ -46,42 +63,48 @@ class HtmlTreeBuilder extends TreeBuilder { Element root = null; if (context != null) { - if (context.ownerDocument() != null) // quirks setup: + if (context.ownerDocument() != null) { doc.quirksMode(context.ownerDocument().quirksMode()); + } // initialise the tokeniser state: String contextTag = context.tagName(); - if (StringUtil.in(contextTag, "title", "textarea")) + if (StringUtil.in(contextTag, "title", "textarea")) { tokeniser.transition(TokeniserState.Rcdata); - else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp")) + } else if (StringUtil.in(contextTag, "iframe", "noembed", + "noframes", "style", "xmp")) { tokeniser.transition(TokeniserState.Rawtext); - else if (contextTag.equals("script")) + } else if (contextTag.equals("script")) { tokeniser.transition(TokeniserState.ScriptData); - else if (contextTag.equals(("noscript"))) - tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext - else if (contextTag.equals("plaintext")) + } else if (contextTag.equals(("noscript"))) { + tokeniser.transition(TokeniserState.Data); // if scripting + // enabled, rawtext + } else if (contextTag.equals("plaintext")) { tokeniser.transition(TokeniserState.Data); - else + } else { tokeniser.transition(TokeniserState.Data); // default + } root = new Element(Tag.valueOf("html"), baseUri); doc.appendChild(root); stack.push(root); resetInsertionMode(); - // todo: setup form element to nearest form on context (up ancestor chain) + // todo: setup form element to nearest form on context (up ancestor + // chain) } runParser(); - if (context != null) + if (context != null) { return root.childNodes(); - else + } else { return doc.childNodes(); + } } @Override protected boolean process(Token token) { currentToken = token; - return this.state.process(token, this); + return state.process(token, this); } boolean process(Token token, HtmlTreeBuilderState state) { @@ -122,14 +145,17 @@ class HtmlTreeBuilder extends TreeBuilder { } void maybeSetBaseUri(Element base) { - if (baseUriSetFromDoc) // only listen to the first <base href> in parse + if (baseUriSetFromDoc) { return; + } String href = base.absUrl("href"); if (href.length() != 0) { // ignore <base target> etc baseUri = href; baseUriSetFromDoc = true; - doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants + doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) + // will get updated base, and to update all + // descendants } } @@ -138,20 +164,26 @@ class HtmlTreeBuilder extends TreeBuilder { } void error(HtmlTreeBuilderState state) { - if (errors.canAddError()) - errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state)); + if (errors.canAddError()) { + errors.add(new ParseError(reader.pos(), + "Unexpected token [%s] when in state [%s]", currentToken + .tokenType(), state)); + } } Element insert(Token.StartTag startTag) { // handle empty unknown tags - // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag. + // when the spec expects an empty tag, will directly hit insertEmpty, so + // won't generate fake end tag. if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) { Element el = insertEmpty(startTag); - process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in + process(new Token.EndTag(el.tagName())); // ensure we get out of + // whatever state we are in return el; } - - Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes); + + Element el = new Element(Tag.valueOf(startTag.name()), baseUri, + startTag.attributes); insert(el); return el; } @@ -173,8 +205,9 @@ class HtmlTreeBuilder extends TreeBuilder { insertNode(el); if (startTag.isSelfClosing()) { tokeniser.acknowledgeSelfClosingFlag(); - if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output + if (!tag.isKnownTag()) { tag.setSelfClosing(); + } } return el; } @@ -187,29 +220,37 @@ class HtmlTreeBuilder extends TreeBuilder { void insert(Token.Character characterToken) { Node node; // characters in script and style go in as datanodes, not text nodes - if (StringUtil.in(currentElement().tagName(), "script", "style")) + if (StringUtil.in(currentElement().tagName(), "script", "style")) { node = new DataNode(characterToken.getData(), baseUri); - else + } else { node = new TextNode(characterToken.getData(), baseUri); - currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack. + } + currentElement().appendChild(node); // doesn't use insertNode, because + // we don't foster these; and will + // always have a stack. } private void insertNode(Node node) { - // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc - if (stack.size() == 0) + // if the stack hasn't been set up yet, elements (doctype, comments) go + // into the doc + if (stack.size() == 0) { doc.appendChild(node); - else if (isFosterInserts()) + } else if (isFosterInserts()) { insertInFosterParent(node); - else + } else { currentElement().appendChild(node); + } } Element pop() { // todo - dev, remove validation check - if (stack.peekLast().nodeName().equals("td") && !state.name().equals("InCell")) + if (stack.peekLast().nodeName().equals("td") + && !state.name().equals("InCell")) { Validate.isFalse(true, "pop td not in cell"); - if (stack.peekLast().nodeName().equals("html")) + } + if (stack.peekLast().nodeName().equals("html")) { Validate.isFalse(true, "popping html!"); + } return stack.pollLast(); } @@ -225,7 +266,8 @@ class HtmlTreeBuilder extends TreeBuilder { return isElementInQueue(stack, el); } - private boolean isElementInQueue(DescendableLinkedList<Element> queue, Element element) { + private boolean isElementInQueue(DescendableLinkedList<Element> queue, + Element element) { Iterator<Element> it = queue.descendingIterator(); while (it.hasNext()) { Element next = it.next(); @@ -313,10 +355,12 @@ class HtmlTreeBuilder extends TreeBuilder { Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element next = it.next(); - if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html")) + if (StringUtil.in(next.nodeName(), nodeNames) + || next.nodeName().equals("html")) { break; - else + } else { it.remove(); + } } } @@ -335,14 +379,15 @@ class HtmlTreeBuilder extends TreeBuilder { void insertOnStackAfter(Element after, Element in) { int i = stack.lastIndexOf(after); Validate.isTrue(i != -1); - stack.add(i+1, in); + stack.add(i + 1, in); } void replaceOnStack(Element out, Element in) { replaceInQueue(stack, out, in); } - private void replaceInQueue(LinkedList<Element> queue, Element out, Element in) { + private void replaceInQueue(LinkedList<Element> queue, Element out, + Element in) { int i = queue.lastIndexOf(out); Validate.isTrue(i != -1); queue.remove(i); @@ -368,7 +413,8 @@ class HtmlTreeBuilder extends TreeBuilder { } else if ("tr".equals(name)) { transition(HtmlTreeBuilderState.InRow); break; - } else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) { + } else if ("tbody".equals(name) || "thead".equals(name) + || "tfoot".equals(name)) { transition(HtmlTreeBuilderState.InTableBody); break; } else if ("caption".equals(name)) { @@ -400,28 +446,35 @@ class HtmlTreeBuilder extends TreeBuilder { } // todo: tidy up in specific scope methods - private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) { - return inSpecificScope(new String[]{targetName}, baseTypes, extraTypes); + private boolean inSpecificScope(String targetName, String[] baseTypes, + String[] extraTypes) { + return inSpecificScope(new String[] { targetName }, baseTypes, + extraTypes); } - private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) { + private boolean inSpecificScope(String[] targetNames, String[] baseTypes, + String[] extraTypes) { Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element el = it.next(); String elName = el.nodeName(); - if (StringUtil.in(elName, targetNames)) + if (StringUtil.in(elName, targetNames)) { return true; - if (StringUtil.in(elName, baseTypes)) + } + if (StringUtil.in(elName, baseTypes)) { return false; - if (extraTypes != null && StringUtil.in(elName, extraTypes)) + } + if (extraTypes != null && StringUtil.in(elName, extraTypes)) { return false; + } } Validate.fail("Should not be reachable"); return false; } boolean inScope(String[] targetNames) { - return inSpecificScope(targetNames, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, null); + return inSpecificScope(targetNames, new String[] { "applet", "caption", + "html", "table", "td", "th", "marquee", "object" }, null); } boolean inScope(String targetName) { @@ -429,21 +482,23 @@ class HtmlTreeBuilder extends TreeBuilder { } boolean inScope(String targetName, String[] extras) { - return inSpecificScope(targetName, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, extras); + return inSpecificScope(targetName, new String[] { "applet", "caption", + "html", "table", "td", "th", "marquee", "object" }, extras); // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml // todo: in svg namespace: forignOjbect, desc, title } boolean inListItemScope(String targetName) { - return inScope(targetName, new String[]{"ol", "ul"}); + return inScope(targetName, new String[] { "ol", "ul" }); } boolean inButtonScope(String targetName) { - return inScope(targetName, new String[]{"button"}); + return inScope(targetName, new String[] { "button" }); } boolean inTableScope(String targetName) { - return inSpecificScope(targetName, new String[]{"html", "table"}, null); + return inSpecificScope(targetName, new String[] { "html", "table" }, + null); } boolean inSelectScope(String targetName) { @@ -451,10 +506,12 @@ class HtmlTreeBuilder extends TreeBuilder { while (it.hasNext()) { Element el = it.next(); String elName = el.nodeName(); - if (elName.equals(targetName)) + if (elName.equals(targetName)) { return true; - if (!StringUtil.in(elName, "optgroup", "option")) // all elements except + } + if (!StringUtil.in(elName, "optgroup", "option")) { return false; + } } Validate.fail("Should not be reachable"); return false; @@ -497,18 +554,26 @@ class HtmlTreeBuilder extends TreeBuilder { } /** - 11.2.5.2 Closing elements that have implied end tags<p/> - When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a - dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element, - the UA must pop the current node off the stack of open elements. - - @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the - process, then the UA must perform the above steps as if that element was not in the above list. + * 11.2.5.2 Closing elements that have implied end tags + * <p/> + * When the steps below require the UA to generate implied end tags, then, + * while the current node is a dd element, a dt element, an li element, an + * option element, an optgroup element, a p element, an rp element, or an rt + * element, the UA must pop the current node off the stack of open elements. + * + * @param excludeTag + * If a step requires the UA to generate implied end tags but + * lists an element to exclude from the process, then the UA must + * perform the above steps as if that element was not in the + * above list. */ void generateImpliedEndTags(String excludeTag) { - while ((excludeTag != null && !currentElement().nodeName().equals(excludeTag)) && - StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) + while ((excludeTag != null && !currentElement().nodeName().equals( + excludeTag)) + && StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", + "option", "optgroup", "p", "rp", "rt")) { pop(); + } } void generateImpliedEndTags() { @@ -519,14 +584,18 @@ class HtmlTreeBuilder extends TreeBuilder { // todo: mathml's mi, mo, mn // todo: svg's foreigObject, desc, title String name = el.nodeName(); - return StringUtil.in(name, "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", - "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd", - "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form", - "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", - "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav", - "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script", - "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", - "title", "tr", "ul", "wbr", "xmp"); + return StringUtil.in(name, "address", "applet", "area", "article", + "aside", "base", "basefont", "bgsound", "blockquote", "body", + "br", "button", "caption", "center", "col", "colgroup", + "command", "dd", "details", "dir", "div", "dl", "dt", "embed", + "fieldset", "figcaption", "figure", "footer", "form", "frame", + "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", + "header", "hgroup", "hr", "html", "iframe", "img", "input", + "isindex", "li", "link", "listing", "marquee", "menu", "meta", + "nav", "noembed", "noframes", "noscript", "object", "ol", "p", + "param", "plaintext", "pre", "script", "section", "select", + "style", "summary", "table", "tbody", "td", "textarea", + "tfoot", "th", "thead", "title", "tr", "ul", "wbr", "xmp"); } // active formatting elements @@ -534,12 +603,14 @@ class HtmlTreeBuilder extends TreeBuilder { int numSeen = 0; Iterator<Element> iter = formattingElements.descendingIterator(); while (iter.hasNext()) { - Element el = iter.next(); - if (el == null) // marker + Element el = iter.next(); + if (el == null) { break; + } - if (isSameFormattingElement(in, el)) + if (isSameFormattingElement(in, el)) { numSeen++; + } if (numSeen == 3) { iter.remove(); @@ -550,17 +621,20 @@ class HtmlTreeBuilder extends TreeBuilder { } private boolean isSameFormattingElement(Element a, Element b) { - // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children + // same if: same namespace, tag, and attributes. Element.equals only + // checks tag, might in future check children return a.nodeName().equals(b.nodeName()) && - // a.namespace().equals(b.namespace()) && + // a.namespace().equals(b.namespace()) && a.attributes().equals(b.attributes()); // todo: namespaces } void reconstructFormattingElements() { int size = formattingElements.size(); - if (size == 0 || formattingElements.getLast() == null || onStack(formattingElements.getLast())) + if (size == 0 || formattingElements.getLast() == null + || onStack(formattingElements.getLast())) { return; + } Element entry = formattingElements.getLast(); int pos = size - 1; @@ -570,18 +644,24 @@ class HtmlTreeBuilder extends TreeBuilder { skip = true; break; } - entry = formattingElements.get(--pos); // step 5. one earlier than entry - if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack + entry = formattingElements.get(--pos); // step 5. one earlier than + // entry + if (entry == null || onStack(entry)) { break; // jump to 8, else continue back to 4 + } } - while(true) { - if (!skip) // step 7: on later than entry + while (true) { + if (!skip) { entry = formattingElements.get(++pos); - Validate.notNull(entry); // should not occur, as we break at last element + } + Validate.notNull(entry); // should not occur, as we break at last + // element - // 8. create new element from element, 9 insert into current node, onto stack + // 8. create new element from element, 9 insert into current node, + // onto stack skip = false; // can only skip increment from 4. - Element newEl = insert(entry.nodeName()); // todo: avoid fostering here? + Element newEl = insert(entry.nodeName()); // todo: avoid fostering + // here? // newEl.namespace(entry.namespace()); // todo: namespaces newEl.attributes().addAll(entry.attributes()); @@ -590,8 +670,9 @@ class HtmlTreeBuilder extends TreeBuilder { formattingElements.remove(pos + 1); // 11 - if (pos == size-1) // if not last entry in list, jump to 7 + if (pos == size - 1) { break; + } } } @@ -599,8 +680,9 @@ class HtmlTreeBuilder extends TreeBuilder { while (!formattingElements.isEmpty()) { Element el = formattingElements.peekLast(); formattingElements.removeLast(); - if (el == null) + if (el == null) { break; + } } } @@ -623,10 +705,11 @@ class HtmlTreeBuilder extends TreeBuilder { Iterator<Element> it = formattingElements.descendingIterator(); while (it.hasNext()) { Element next = it.next(); - if (next == null) // scope marker + if (next == null) { break; - else if (next.nodeName().equals(nodeName)) + } else if (next.nodeName().equals(nodeName)) { return next; + } } return null; } @@ -647,26 +730,25 @@ class HtmlTreeBuilder extends TreeBuilder { if (lastTable.parent() != null) { fosterParent = lastTable.parent(); isLastTableParent = true; - } else + } else { fosterParent = aboveOnStack(lastTable); + } } else { // no table == frag fosterParent = stack.get(0); } if (isLastTableParent) { - Validate.notNull(lastTable); // last table cannot be null by this point. + Validate.notNull(lastTable); // last table cannot be null by this + // point. lastTable.before(in); - } - else + } else { fosterParent.appendChild(in); + } } @Override public String toString() { - return "TreeBuilder{" + - "currentToken=" + currentToken + - ", state=" + state + - ", currentElement=" + currentElement() + - '}'; + return "TreeBuilder{" + "currentToken=" + currentToken + ", state=" + + state + ", currentElement=" + currentElement() + '}'; } } |