aboutsummaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/parser/HtmlTreeBuilder.java
diff options
context:
space:
mode:
authorLeif Åstrand <leif@vaadin.com>2012-09-05 19:50:56 +0300
committerLeif Åstrand <leif@vaadin.com>2012-09-05 19:51:15 +0300
commit7d25670284b11c7c62ba25183f265227cb3dba83 (patch)
treec8e76eb70dd3cdd5cf59a99419635f2188b25c24 /server/src/org/jsoup/parser/HtmlTreeBuilder.java
parent1d0c96de9595c243d88471476d21e5f248be63f7 (diff)
downloadvaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.tar.gz
vaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.zip
Reformat project
Diffstat (limited to 'server/src/org/jsoup/parser/HtmlTreeBuilder.java')
-rw-r--r--server/src/org/jsoup/parser/HtmlTreeBuilder.java286
1 files changed, 184 insertions, 102 deletions
diff --git a/server/src/org/jsoup/parser/HtmlTreeBuilder.java b/server/src/org/jsoup/parser/HtmlTreeBuilder.java
index 457a4c3249..f09ab8794c 100644
--- a/server/src/org/jsoup/parser/HtmlTreeBuilder.java
+++ b/server/src/org/jsoup/parser/HtmlTreeBuilder.java
@@ -1,15 +1,20 @@
package org.jsoup.parser;
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
-
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import org.jsoup.helper.DescendableLinkedList;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Comment;
+import org.jsoup.nodes.DataNode;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+
/**
* HTML Tree Builder; creates a DOM from Tokens.
*/
@@ -21,15 +26,26 @@ class HtmlTreeBuilder extends TreeBuilder {
private boolean baseUriSetFromDoc = false;
private Element headElement; // the current head element
private Element formElement; // the current form element
- private Element contextElement; // fragment parse context -- could be null even if fragment parsing
- private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
- private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
+ private Element contextElement; // fragment parse context -- could be null
+ // even if fragment parsing
+ private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active
+ // (open)
+ // formatting
+ // elements
+ private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars
+ // in
+ // table
+ // to
+ // be
+ // shifted
+ // out
private boolean framesetOk = true; // if ok to go into frameset
private boolean fosterInserts = false; // if next inserts should be fostered
private boolean fragmentParsing = false; // if parsing a fragment of html
- HtmlTreeBuilder() {}
+ HtmlTreeBuilder() {
+ }
@Override
Document parse(String input, String baseUri, ParseErrorList errors) {
@@ -37,7 +53,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return super.parse(input, baseUri, errors);
}
- List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
+ List<Node> parseFragment(String inputFragment, Element context,
+ String baseUri, ParseErrorList errors) {
// context may be null
state = HtmlTreeBuilderState.Initial;
initialiseParse(inputFragment, baseUri, errors);
@@ -46,42 +63,48 @@ class HtmlTreeBuilder extends TreeBuilder {
Element root = null;
if (context != null) {
- if (context.ownerDocument() != null) // quirks setup:
+ if (context.ownerDocument() != null) {
doc.quirksMode(context.ownerDocument().quirksMode());
+ }
// initialise the tokeniser state:
String contextTag = context.tagName();
- if (StringUtil.in(contextTag, "title", "textarea"))
+ if (StringUtil.in(contextTag, "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata);
- else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
+ } else if (StringUtil.in(contextTag, "iframe", "noembed",
+ "noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext);
- else if (contextTag.equals("script"))
+ } else if (contextTag.equals("script")) {
tokeniser.transition(TokeniserState.ScriptData);
- else if (contextTag.equals(("noscript")))
- tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
- else if (contextTag.equals("plaintext"))
+ } else if (contextTag.equals(("noscript"))) {
+ tokeniser.transition(TokeniserState.Data); // if scripting
+ // enabled, rawtext
+ } else if (contextTag.equals("plaintext")) {
tokeniser.transition(TokeniserState.Data);
- else
+ } else {
tokeniser.transition(TokeniserState.Data); // default
+ }
root = new Element(Tag.valueOf("html"), baseUri);
doc.appendChild(root);
stack.push(root);
resetInsertionMode();
- // todo: setup form element to nearest form on context (up ancestor chain)
+ // todo: setup form element to nearest form on context (up ancestor
+ // chain)
}
runParser();
- if (context != null)
+ if (context != null) {
return root.childNodes();
- else
+ } else {
return doc.childNodes();
+ }
}
@Override
protected boolean process(Token token) {
currentToken = token;
- return this.state.process(token, this);
+ return state.process(token, this);
}
boolean process(Token token, HtmlTreeBuilderState state) {
@@ -122,14 +145,17 @@ class HtmlTreeBuilder extends TreeBuilder {
}
void maybeSetBaseUri(Element base) {
- if (baseUriSetFromDoc) // only listen to the first <base href> in parse
+ if (baseUriSetFromDoc) {
return;
+ }
String href = base.absUrl("href");
if (href.length() != 0) { // ignore <base target> etc
baseUri = href;
baseUriSetFromDoc = true;
- doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
+ doc.setBaseUri(href); // set on the doc so doc.createElement(Tag)
+ // will get updated base, and to update all
+ // descendants
}
}
@@ -138,20 +164,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}
void error(HtmlTreeBuilderState state) {
- if (errors.canAddError())
- errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state));
+ if (errors.canAddError()) {
+ errors.add(new ParseError(reader.pos(),
+ "Unexpected token [%s] when in state [%s]", currentToken
+ .tokenType(), state));
+ }
}
Element insert(Token.StartTag startTag) {
// handle empty unknown tags
- // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag.
+ // when the spec expects an empty tag, will directly hit insertEmpty, so
+ // won't generate fake end tag.
if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) {
Element el = insertEmpty(startTag);
- process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in
+ process(new Token.EndTag(el.tagName())); // ensure we get out of
+ // whatever state we are in
return el;
}
-
- Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes);
+
+ Element el = new Element(Tag.valueOf(startTag.name()), baseUri,
+ startTag.attributes);
insert(el);
return el;
}
@@ -173,8 +205,9 @@ class HtmlTreeBuilder extends TreeBuilder {
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
- if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output
+ if (!tag.isKnownTag()) {
tag.setSelfClosing();
+ }
}
return el;
}
@@ -187,29 +220,37 @@ class HtmlTreeBuilder extends TreeBuilder {
void insert(Token.Character characterToken) {
Node node;
// characters in script and style go in as datanodes, not text nodes
- if (StringUtil.in(currentElement().tagName(), "script", "style"))
+ if (StringUtil.in(currentElement().tagName(), "script", "style")) {
node = new DataNode(characterToken.getData(), baseUri);
- else
+ } else {
node = new TextNode(characterToken.getData(), baseUri);
- currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
+ }
+ currentElement().appendChild(node); // doesn't use insertNode, because
+ // we don't foster these; and will
+ // always have a stack.
}
private void insertNode(Node node) {
- // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
- if (stack.size() == 0)
+ // if the stack hasn't been set up yet, elements (doctype, comments) go
+ // into the doc
+ if (stack.size() == 0) {
doc.appendChild(node);
- else if (isFosterInserts())
+ } else if (isFosterInserts()) {
insertInFosterParent(node);
- else
+ } else {
currentElement().appendChild(node);
+ }
}
Element pop() {
// todo - dev, remove validation check
- if (stack.peekLast().nodeName().equals("td") && !state.name().equals("InCell"))
+ if (stack.peekLast().nodeName().equals("td")
+ && !state.name().equals("InCell")) {
Validate.isFalse(true, "pop td not in cell");
- if (stack.peekLast().nodeName().equals("html"))
+ }
+ if (stack.peekLast().nodeName().equals("html")) {
Validate.isFalse(true, "popping html!");
+ }
return stack.pollLast();
}
@@ -225,7 +266,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return isElementInQueue(stack, el);
}
- private boolean isElementInQueue(DescendableLinkedList<Element> queue, Element element) {
+ private boolean isElementInQueue(DescendableLinkedList<Element> queue,
+ Element element) {
Iterator<Element> it = queue.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
@@ -313,10 +355,12 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
- if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
+ if (StringUtil.in(next.nodeName(), nodeNames)
+ || next.nodeName().equals("html")) {
break;
- else
+ } else {
it.remove();
+ }
}
}
@@ -335,14 +379,15 @@ class HtmlTreeBuilder extends TreeBuilder {
void insertOnStackAfter(Element after, Element in) {
int i = stack.lastIndexOf(after);
Validate.isTrue(i != -1);
- stack.add(i+1, in);
+ stack.add(i + 1, in);
}
void replaceOnStack(Element out, Element in) {
replaceInQueue(stack, out, in);
}
- private void replaceInQueue(LinkedList<Element> queue, Element out, Element in) {
+ private void replaceInQueue(LinkedList<Element> queue, Element out,
+ Element in) {
int i = queue.lastIndexOf(out);
Validate.isTrue(i != -1);
queue.remove(i);
@@ -368,7 +413,8 @@ class HtmlTreeBuilder extends TreeBuilder {
} else if ("tr".equals(name)) {
transition(HtmlTreeBuilderState.InRow);
break;
- } else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) {
+ } else if ("tbody".equals(name) || "thead".equals(name)
+ || "tfoot".equals(name)) {
transition(HtmlTreeBuilderState.InTableBody);
break;
} else if ("caption".equals(name)) {
@@ -400,28 +446,35 @@ class HtmlTreeBuilder extends TreeBuilder {
}
// todo: tidy up in specific scope methods
- private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
- return inSpecificScope(new String[]{targetName}, baseTypes, extraTypes);
+ private boolean inSpecificScope(String targetName, String[] baseTypes,
+ String[] extraTypes) {
+ return inSpecificScope(new String[] { targetName }, baseTypes,
+ extraTypes);
}
- private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
+ private boolean inSpecificScope(String[] targetNames, String[] baseTypes,
+ String[] extraTypes) {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
- if (StringUtil.in(elName, targetNames))
+ if (StringUtil.in(elName, targetNames)) {
return true;
- if (StringUtil.in(elName, baseTypes))
+ }
+ if (StringUtil.in(elName, baseTypes)) {
return false;
- if (extraTypes != null && StringUtil.in(elName, extraTypes))
+ }
+ if (extraTypes != null && StringUtil.in(elName, extraTypes)) {
return false;
+ }
}
Validate.fail("Should not be reachable");
return false;
}
boolean inScope(String[] targetNames) {
- return inSpecificScope(targetNames, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, null);
+ return inSpecificScope(targetNames, new String[] { "applet", "caption",
+ "html", "table", "td", "th", "marquee", "object" }, null);
}
boolean inScope(String targetName) {
@@ -429,21 +482,23 @@ class HtmlTreeBuilder extends TreeBuilder {
}
boolean inScope(String targetName, String[] extras) {
- return inSpecificScope(targetName, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, extras);
+ return inSpecificScope(targetName, new String[] { "applet", "caption",
+ "html", "table", "td", "th", "marquee", "object" }, extras);
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}
boolean inListItemScope(String targetName) {
- return inScope(targetName, new String[]{"ol", "ul"});
+ return inScope(targetName, new String[] { "ol", "ul" });
}
boolean inButtonScope(String targetName) {
- return inScope(targetName, new String[]{"button"});
+ return inScope(targetName, new String[] { "button" });
}
boolean inTableScope(String targetName) {
- return inSpecificScope(targetName, new String[]{"html", "table"}, null);
+ return inSpecificScope(targetName, new String[] { "html", "table" },
+ null);
}
boolean inSelectScope(String targetName) {
@@ -451,10 +506,12 @@ class HtmlTreeBuilder extends TreeBuilder {
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
- if (elName.equals(targetName))
+ if (elName.equals(targetName)) {
return true;
- if (!StringUtil.in(elName, "optgroup", "option")) // all elements except
+ }
+ if (!StringUtil.in(elName, "optgroup", "option")) {
return false;
+ }
}
Validate.fail("Should not be reachable");
return false;
@@ -497,18 +554,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}
/**
- 11.2.5.2 Closing elements that have implied end tags<p/>
- When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
- dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element,
- the UA must pop the current node off the stack of open elements.
-
- @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
- process, then the UA must perform the above steps as if that element was not in the above list.
+ * 11.2.5.2 Closing elements that have implied end tags
+ * <p/>
+ * When the steps below require the UA to generate implied end tags, then,
+ * while the current node is a dd element, a dt element, an li element, an
+ * option element, an optgroup element, a p element, an rp element, or an rt
+ * element, the UA must pop the current node off the stack of open elements.
+ *
+ * @param excludeTag
+ * If a step requires the UA to generate implied end tags but
+ * lists an element to exclude from the process, then the UA must
+ * perform the above steps as if that element was not in the
+ * above list.
*/
void generateImpliedEndTags(String excludeTag) {
- while ((excludeTag != null && !currentElement().nodeName().equals(excludeTag)) &&
- StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
+ while ((excludeTag != null && !currentElement().nodeName().equals(
+ excludeTag))
+ && StringUtil.in(currentElement().nodeName(), "dd", "dt", "li",
+ "option", "optgroup", "p", "rp", "rt")) {
pop();
+ }
}
void generateImpliedEndTags() {
@@ -519,14 +584,18 @@ class HtmlTreeBuilder extends TreeBuilder {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
String name = el.nodeName();
- return StringUtil.in(name, "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
- "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
- "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
- "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
- "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
- "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
- "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
- "title", "tr", "ul", "wbr", "xmp");
+ return StringUtil.in(name, "address", "applet", "area", "article",
+ "aside", "base", "basefont", "bgsound", "blockquote", "body",
+ "br", "button", "caption", "center", "col", "colgroup",
+ "command", "dd", "details", "dir", "div", "dl", "dt", "embed",
+ "fieldset", "figcaption", "figure", "footer", "form", "frame",
+ "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
+ "header", "hgroup", "hr", "html", "iframe", "img", "input",
+ "isindex", "li", "link", "listing", "marquee", "menu", "meta",
+ "nav", "noembed", "noframes", "noscript", "object", "ol", "p",
+ "param", "plaintext", "pre", "script", "section", "select",
+ "style", "summary", "table", "tbody", "td", "textarea",
+ "tfoot", "th", "thead", "title", "tr", "ul", "wbr", "xmp");
}
// active formatting elements
@@ -534,12 +603,14 @@ class HtmlTreeBuilder extends TreeBuilder {
int numSeen = 0;
Iterator<Element> iter = formattingElements.descendingIterator();
while (iter.hasNext()) {
- Element el = iter.next();
- if (el == null) // marker
+ Element el = iter.next();
+ if (el == null) {
break;
+ }
- if (isSameFormattingElement(in, el))
+ if (isSameFormattingElement(in, el)) {
numSeen++;
+ }
if (numSeen == 3) {
iter.remove();
@@ -550,17 +621,20 @@ class HtmlTreeBuilder extends TreeBuilder {
}
private boolean isSameFormattingElement(Element a, Element b) {
- // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
+ // same if: same namespace, tag, and attributes. Element.equals only
+ // checks tag, might in future check children
return a.nodeName().equals(b.nodeName()) &&
- // a.namespace().equals(b.namespace()) &&
+ // a.namespace().equals(b.namespace()) &&
a.attributes().equals(b.attributes());
// todo: namespaces
}
void reconstructFormattingElements() {
int size = formattingElements.size();
- if (size == 0 || formattingElements.getLast() == null || onStack(formattingElements.getLast()))
+ if (size == 0 || formattingElements.getLast() == null
+ || onStack(formattingElements.getLast())) {
return;
+ }
Element entry = formattingElements.getLast();
int pos = size - 1;
@@ -570,18 +644,24 @@ class HtmlTreeBuilder extends TreeBuilder {
skip = true;
break;
}
- entry = formattingElements.get(--pos); // step 5. one earlier than entry
- if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
+ entry = formattingElements.get(--pos); // step 5. one earlier than
+ // entry
+ if (entry == null || onStack(entry)) {
break; // jump to 8, else continue back to 4
+ }
}
- while(true) {
- if (!skip) // step 7: on later than entry
+ while (true) {
+ if (!skip) {
entry = formattingElements.get(++pos);
- Validate.notNull(entry); // should not occur, as we break at last element
+ }
+ Validate.notNull(entry); // should not occur, as we break at last
+ // element
- // 8. create new element from element, 9 insert into current node, onto stack
+ // 8. create new element from element, 9 insert into current node,
+ // onto stack
skip = false; // can only skip increment from 4.
- Element newEl = insert(entry.nodeName()); // todo: avoid fostering here?
+ Element newEl = insert(entry.nodeName()); // todo: avoid fostering
+ // here?
// newEl.namespace(entry.namespace()); // todo: namespaces
newEl.attributes().addAll(entry.attributes());
@@ -590,8 +670,9 @@ class HtmlTreeBuilder extends TreeBuilder {
formattingElements.remove(pos + 1);
// 11
- if (pos == size-1) // if not last entry in list, jump to 7
+ if (pos == size - 1) {
break;
+ }
}
}
@@ -599,8 +680,9 @@ class HtmlTreeBuilder extends TreeBuilder {
while (!formattingElements.isEmpty()) {
Element el = formattingElements.peekLast();
formattingElements.removeLast();
- if (el == null)
+ if (el == null) {
break;
+ }
}
}
@@ -623,10 +705,11 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = formattingElements.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
- if (next == null) // scope marker
+ if (next == null) {
break;
- else if (next.nodeName().equals(nodeName))
+ } else if (next.nodeName().equals(nodeName)) {
return next;
+ }
}
return null;
}
@@ -647,26 +730,25 @@ class HtmlTreeBuilder extends TreeBuilder {
if (lastTable.parent() != null) {
fosterParent = lastTable.parent();
isLastTableParent = true;
- } else
+ } else {
fosterParent = aboveOnStack(lastTable);
+ }
} else { // no table == frag
fosterParent = stack.get(0);
}
if (isLastTableParent) {
- Validate.notNull(lastTable); // last table cannot be null by this point.
+ Validate.notNull(lastTable); // last table cannot be null by this
+ // point.
lastTable.before(in);
- }
- else
+ } else {
fosterParent.appendChild(in);
+ }
}
@Override
public String toString() {
- return "TreeBuilder{" +
- "currentToken=" + currentToken +
- ", state=" + state +
- ", currentElement=" + currentElement() +
- '}';
+ return "TreeBuilder{" + "currentToken=" + currentToken + ", state="
+ + state + ", currentElement=" + currentElement() + '}';
}
}