14 files changed, 0 insertions, 5705 deletions
diff --git a/src/org/jsoup/parser/CharacterReader.java b/src/org/jsoup/parser/CharacterReader.java
deleted file mode 100644
index b549a571a0..0000000000
--- a/src/org/jsoup/parser/CharacterReader.java
+++ /dev/null
@@ -1,230 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-
-/**
- CharacterReader consumes tokens off a string. To replace the old TokenQueue.
- */
-class CharacterReader {
-    static final char EOF = (char) -1;
-
-    private final String input;
-    private final int length;
-    private int pos = 0;
-    private int mark = 0;
-
-    CharacterReader(String input) {
-        Validate.notNull(input);
-        input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns to newlines
-
-        this.input = input;
-        this.length = input.length();
-    }
-
-    int pos() {
-        return pos;
-    }
-
-    boolean isEmpty() {
-        return pos >= length;
-    }
-
-    char current() {
-        return isEmpty() ? EOF : input.charAt(pos);
-    }
-
-    char consume() {
-        char val = isEmpty() ? EOF : input.charAt(pos);
-        pos++;
-        return val;
-    }
-
-    void unconsume() {
-        pos--;
-    }
-
-    void advance() {
-        pos++;
-    }
-
-    void mark() {
-        mark = pos;
-    }
-
-    void rewindToMark() {
-        pos = mark;
-    }
-
-    String consumeAsString() {
-        return input.substring(pos, pos++);
-    }
-
-    String consumeTo(char c) {
-        int offset = input.indexOf(c, pos);
-        if (offset != -1) {
-            String consumed = input.substring(pos, offset);
-            pos += consumed.length();
-            return consumed;
-        } else {
-            return consumeToEnd();
-        }
-    }
-
-    String consumeTo(String seq) {
-        int offset = input.indexOf(seq, pos);
-        if (offset != -1) {
-            String consumed = input.substring(pos, offset);
-            pos += consumed.length();
-            return consumed;
-        } else {
-            return consumeToEnd();
-        }
-    }
-
-    String consumeToAny(char... seq) {
-        int start = pos;
-
-        OUTER: while (!isEmpty()) {
-            char c = input.charAt(pos);
-            for (char seek : seq) {
-                if (seek == c)
-                    break OUTER;
-            }
-            pos++;
-        }
-
-        return pos > start ? input.substring(start, pos) : "";
-    }
-
-    String consumeToEnd() {
-        String data = input.substring(pos, input.length());
-        pos = input.length();
-        return data;
-    }
-
-    String consumeLetterSequence() {
-        int start = pos;
-        while (!isEmpty()) {
-            char c = input.charAt(pos);
-            if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
-                pos++;
-            else
-                break;
-        }
-
-        return input.substring(start, pos);
-    }
-
-    String consumeLetterThenDigitSequence() {
-        int start = pos;
-        while (!isEmpty()) {
-            char c = input.charAt(pos);
-            if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
-                pos++;
-            else
-                break;
-        }
-        while (!isEmpty()) {
-            char c = input.charAt(pos);
-            if (c >= '0' && c <= '9')
-                pos++;
-            else
-                break;
-        }
-
-        return input.substring(start, pos);
-    }
-
-    String consumeHexSequence() {
-        int start = pos;
-        while (!isEmpty()) {
-            char c = input.charAt(pos);
-            if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
-                pos++;
-            else
-                break;
-        }
-        return input.substring(start, pos);
-    }
-
-    String consumeDigitSequence() {
-        int start = pos;
-        while (!isEmpty()) {
-            char c = input.charAt(pos);
-            if (c >= '0' && c <= '9')
-                pos++;
-            else
-                break;
-        }
-        return input.substring(start, pos);
-    }
-
-    boolean matches(char c) {
-        return !isEmpty() && input.charAt(pos) == c;
-
-    }
-
-    boolean matches(String seq) {
-        return input.startsWith(seq, pos);
-    }
-
-    boolean matchesIgnoreCase(String seq) {
-        return input.regionMatches(true, pos, seq, 0, seq.length());
-    }
-
-    boolean matchesAny(char... seq) {
-        if (isEmpty())
-            return false;
-
-        char c = input.charAt(pos);
-        for (char seek : seq) {
-            if (seek == c)
-                return true;
-        }
-        return false;
-    }
-
-    boolean matchesLetter() {
-        if (isEmpty())
-            return false;
-        char c = input.charAt(pos);
-        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-    }
-
-    boolean matchesDigit() {
-        if (isEmpty())
-            return false;
-        char c = input.charAt(pos);
-        return (c >= '0' && c <= '9');
-    }
-
-    boolean matchConsume(String seq) {
-        if (matches(seq)) {
-            pos += seq.length();
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    boolean matchConsumeIgnoreCase(String seq) {
-        if (matchesIgnoreCase(seq)) {
-            pos += seq.length();
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    boolean containsIgnoreCase(String seq) {
-        // used to check presence of </title>, </style>. only finds consistent case.
-        String loScan = seq.toLowerCase();
-        String hiScan = seq.toUpperCase();
-        return (input.indexOf(loScan, pos) > -1) || (input.indexOf(hiScan, pos) > -1);
-    }
-
-    @Override
-    public String toString() {
-        return input.substring(pos);
-    }
-}
diff --git a/src/org/jsoup/parser/HtmlTreeBuilder.java b/src/org/jsoup/parser/HtmlTreeBuilder.java
deleted file mode 100644
index 457a4c3249..0000000000
--- a/src/org/jsoup/parser/HtmlTreeBuilder.java
+++ /dev/null
@@ -1,672 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-/**
- * HTML Tree Builder; creates a DOM from Tokens.
- */
-class HtmlTreeBuilder extends TreeBuilder {
-
-    private HtmlTreeBuilderState state; // the current state
-    private HtmlTreeBuilderState originalState; // original / marked state
-
-    private boolean baseUriSetFromDoc = false;
-    private Element headElement; // the current head element
-    private Element formElement; // the current form element
-    private Element contextElement; // fragment parse context -- could be null even if fragment parsing
-    private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
-    private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
-
-    private boolean framesetOk = true; // if ok to go into frameset
-    private boolean fosterInserts = false; // if next inserts should be fostered
-    private boolean fragmentParsing = false; // if parsing a fragment of html
-
-    HtmlTreeBuilder() {}
-
-    @Override
-    Document parse(String input, String baseUri, ParseErrorList errors) {
-        state = HtmlTreeBuilderState.Initial;
-        return super.parse(input, baseUri, errors);
-    }
-
-    List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
-        // context may be null
-        state = HtmlTreeBuilderState.Initial;
-        initialiseParse(inputFragment, baseUri, errors);
-        contextElement = context;
-        fragmentParsing = true;
-        Element root = null;
-
-        if (context != null) {
-            if (context.ownerDocument() != null) // quirks setup:
-                doc.quirksMode(context.ownerDocument().quirksMode());
-
-            // initialise the tokeniser state:
-            String contextTag = context.tagName();
-            if (StringUtil.in(contextTag, "title", "textarea"))
-                tokeniser.transition(TokeniserState.Rcdata);
-            else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
-                tokeniser.transition(TokeniserState.Rawtext);
-            else if (contextTag.equals("script"))
-                tokeniser.transition(TokeniserState.ScriptData);
-            else if (contextTag.equals(("noscript")))
-                tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
-            else if (contextTag.equals("plaintext"))
-                tokeniser.transition(TokeniserState.Data);
-            else
-                tokeniser.transition(TokeniserState.Data); // default
-
-            root = new Element(Tag.valueOf("html"), baseUri);
-            doc.appendChild(root);
-            stack.push(root);
-            resetInsertionMode();
-            // todo: setup form element to nearest form on context (up ancestor chain)
-        }
-
-        runParser();
-        if (context != null)
-            return root.childNodes();
-        else
-            return doc.childNodes();
-    }
-
-    @Override
-    protected boolean process(Token token) {
-        currentToken = token;
-        return this.state.process(token, this);
-    }
-
-    boolean process(Token token, HtmlTreeBuilderState state) {
-        currentToken = token;
-        return state.process(token, this);
-    }
-
-    void transition(HtmlTreeBuilderState state) {
-        this.state = state;
-    }
-
-    HtmlTreeBuilderState state() {
-        return state;
-    }
-
-    void markInsertionMode() {
-        originalState = state;
-    }
-
-    HtmlTreeBuilderState originalState() {
-        return originalState;
-    }
-
-    void framesetOk(boolean framesetOk) {
-        this.framesetOk = framesetOk;
-    }
-
-    boolean framesetOk() {
-        return framesetOk;
-    }
-
-    Document getDocument() {
-        return doc;
-    }
-
-    String getBaseUri() {
-        return baseUri;
-    }
-
-    void maybeSetBaseUri(Element base) {
-        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
-            return;
-
-        String href = base.absUrl("href");
-        if (href.length() != 0) { // ignore <base target> etc
-            baseUri = href;
-            baseUriSetFromDoc = true;
-            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
-        }
-    }
-
-    boolean isFragmentParsing() {
-        return fragmentParsing;
-    }
-
-    void error(HtmlTreeBuilderState state) {
-        if (errors.canAddError())
-            errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state));
-    }
-
-    Element insert(Token.StartTag startTag) {
-        // handle empty unknown tags
-        // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag.
-        if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) {
-            Element el = insertEmpty(startTag);
-            process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in
-            return el;
-        }
-        
-        Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes);
-        insert(el);
-        return el;
-    }
-
-    Element insert(String startTagName) {
-        Element el = new Element(Tag.valueOf(startTagName), baseUri);
-        insert(el);
-        return el;
-    }
-
-    void insert(Element el) {
-        insertNode(el);
-        stack.add(el);
-    }
-
-    Element insertEmpty(Token.StartTag startTag) {
-        Tag tag = Tag.valueOf(startTag.name());
-        Element el = new Element(tag, baseUri, startTag.attributes);
-        insertNode(el);
-        if (startTag.isSelfClosing()) {
-            tokeniser.acknowledgeSelfClosingFlag();
-            if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output
-                tag.setSelfClosing();
-        }
-        return el;
-    }
-
-    void insert(Token.Comment commentToken) {
-        Comment comment = new Comment(commentToken.getData(), baseUri);
-        insertNode(comment);
-    }
-
-    void insert(Token.Character characterToken) {
-        Node node;
-        // characters in script and style go in as datanodes, not text nodes
-        if (StringUtil.in(currentElement().tagName(), "script", "style"))
-            node = new DataNode(characterToken.getData(), baseUri);
-        else
-            node = new TextNode(characterToken.getData(), baseUri);
-        currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
-    }
-
-    private void insertNode(Node node) {
-        // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
-        if (stack.size() == 0)
-            doc.appendChild(node);
-        else if (isFosterInserts())
-            insertInFosterParent(node);
-        else
-            currentElement().appendChild(node);
-    }
-
-    Element pop() {
-        // todo - dev, remove validation check
-        if (stack.peekLast().nodeName().equals("td") && !state.name().equals("InCell"))
-            Validate.isFalse(true, "pop td not in cell");
-        if (stack.peekLast().nodeName().equals("html"))
-            Validate.isFalse(true, "popping html!");
-        return stack.pollLast();
-    }
-
-    void push(Element element) {
-        stack.add(element);
-    }
-
-    DescendableLinkedList<Element> getStack() {
-        return stack;
-    }
-
-    boolean onStack(Element el) {
-        return isElementInQueue(stack, el);
-    }
-
-    private boolean isElementInQueue(DescendableLinkedList<Element> queue, Element element) {
-        Iterator<Element> it = queue.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == element) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    Element getFromStack(String elName) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next.nodeName().equals(elName)) {
-                return next;
-            }
-        }
-        return null;
-    }
-
-    boolean removeFromStack(Element el) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == el) {
-                it.remove();
-                return true;
-            }
-        }
-        return false;
-    }
-
-    void popStackToClose(String elName) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next.nodeName().equals(elName)) {
-                it.remove();
-                break;
-            } else {
-                it.remove();
-            }
-        }
-    }
-
-    void popStackToClose(String... elNames) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (StringUtil.in(next.nodeName(), elNames)) {
-                it.remove();
-                break;
-            } else {
-                it.remove();
-            }
-        }
-    }
-
-    void popStackToBefore(String elName) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next.nodeName().equals(elName)) {
-                break;
-            } else {
-                it.remove();
-            }
-        }
-    }
-
-    void clearStackToTableContext() {
-        clearStackToContext("table");
-    }
-
-    void clearStackToTableBodyContext() {
-        clearStackToContext("tbody", "tfoot", "thead");
-    }
-
-    void clearStackToTableRowContext() {
-        clearStackToContext("tr");
-    }
-
-    private void clearStackToContext(String... nodeNames) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
-                break;
-            else
-                it.remove();
-        }
-    }
-
-    Element aboveOnStack(Element el) {
-        assert onStack(el);
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == el) {
-                return it.next();
-            }
-        }
-        return null;
-    }
-
-    void insertOnStackAfter(Element after, Element in) {
-        int i = stack.lastIndexOf(after);
-        Validate.isTrue(i != -1);
-        stack.add(i+1, in);
-    }
-
-    void replaceOnStack(Element out, Element in) {
-        replaceInQueue(stack, out, in);
-    }
-
-    private void replaceInQueue(LinkedList<Element> queue, Element out, Element in) {
-        int i = queue.lastIndexOf(out);
-        Validate.isTrue(i != -1);
-        queue.remove(i);
-        queue.add(i, in);
-    }
-
-    void resetInsertionMode() {
-        boolean last = false;
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element node = it.next();
-            if (!it.hasNext()) {
-                last = true;
-                node = contextElement;
-            }
-            String name = node.nodeName();
-            if ("select".equals(name)) {
-                transition(HtmlTreeBuilderState.InSelect);
-                break; // frag
-            } else if (("td".equals(name) || "td".equals(name) && !last)) {
-                transition(HtmlTreeBuilderState.InCell);
-                break;
-            } else if ("tr".equals(name)) {
-                transition(HtmlTreeBuilderState.InRow);
-                break;
-            } else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) {
-                transition(HtmlTreeBuilderState.InTableBody);
-                break;
-            } else if ("caption".equals(name)) {
-                transition(HtmlTreeBuilderState.InCaption);
-                break;
-            } else if ("colgroup".equals(name)) {
-                transition(HtmlTreeBuilderState.InColumnGroup);
-                break; // frag
-            } else if ("table".equals(name)) {
-                transition(HtmlTreeBuilderState.InTable);
-                break;
-            } else if ("head".equals(name)) {
-                transition(HtmlTreeBuilderState.InBody);
-                break; // frag
-            } else if ("body".equals(name)) {
-                transition(HtmlTreeBuilderState.InBody);
-                break;
-            } else if ("frameset".equals(name)) {
-                transition(HtmlTreeBuilderState.InFrameset);
-                break; // frag
-            } else if ("html".equals(name)) {
-                transition(HtmlTreeBuilderState.BeforeHead);
-                break; // frag
-            } else if (last) {
-                transition(HtmlTreeBuilderState.InBody);
-                break; // frag
-            }
-        }
-    }
-
-    // todo: tidy up in specific scope methods
-    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
-        return inSpecificScope(new String[]{targetName}, baseTypes, extraTypes);
-    }
-
-    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element el = it.next();
-            String elName = el.nodeName();
-            if (StringUtil.in(elName, targetNames))
-                return true;
-            if (StringUtil.in(elName, baseTypes))
-                return false;
-            if (extraTypes != null && StringUtil.in(elName, extraTypes))
-                return false;
-        }
-        Validate.fail("Should not be reachable");
-        return false;
-    }
-
-    boolean inScope(String[] targetNames) {
-        return inSpecificScope(targetNames, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, null);
-    }
-
-    boolean inScope(String targetName) {
-        return inScope(targetName, null);
-    }
-
-    boolean inScope(String targetName, String[] extras) {
-        return inSpecificScope(targetName, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, extras);
-        // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
-        // todo: in svg namespace: forignOjbect, desc, title
-    }
-
-    boolean inListItemScope(String targetName) {
-        return inScope(targetName, new String[]{"ol", "ul"});
-    }
-
-    boolean inButtonScope(String targetName) {
-        return inScope(targetName, new String[]{"button"});
-    }
-
-    boolean inTableScope(String targetName) {
-        return inSpecificScope(targetName, new String[]{"html", "table"}, null);
-    }
-
-    boolean inSelectScope(String targetName) {
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element el = it.next();
-            String elName = el.nodeName();
-            if (elName.equals(targetName))
-                return true;
-            if (!StringUtil.in(elName, "optgroup", "option")) // all elements except
-                return false;
-        }
-        Validate.fail("Should not be reachable");
-        return false;
-    }
-
-    void setHeadElement(Element headElement) {
-        this.headElement = headElement;
-    }
-
-    Element getHeadElement() {
-        return headElement;
-    }
-
-    boolean isFosterInserts() {
-        return fosterInserts;
-    }
-
-    void setFosterInserts(boolean fosterInserts) {
-        this.fosterInserts = fosterInserts;
-    }
-
-    Element getFormElement() {
-        return formElement;
-    }
-
-    void setFormElement(Element formElement) {
-        this.formElement = formElement;
-    }
-
-    void newPendingTableCharacters() {
-        pendingTableCharacters = new ArrayList<Token.Character>();
-    }
-
-    List<Token.Character> getPendingTableCharacters() {
-        return pendingTableCharacters;
-    }
-
-    void setPendingTableCharacters(List<Token.Character> pendingTableCharacters) {
-        this.pendingTableCharacters = pendingTableCharacters;
-    }
-
-    /**
-     11.2.5.2 Closing elements that have implied end tags<p/>
-     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
-     dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element,
-     the UA must pop the current node off the stack of open elements.
-
-     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
-     process, then the UA must perform the above steps as if that element was not in the above list.
-     */
-    void generateImpliedEndTags(String excludeTag) {
-        while ((excludeTag != null && !currentElement().nodeName().equals(excludeTag)) &&
-                StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
-            pop();
-    }
-
-    void generateImpliedEndTags() {
-        generateImpliedEndTags(null);
-    }
-
-    boolean isSpecial(Element el) {
-        // todo: mathml's mi, mo, mn
-        // todo: svg's foreigObject, desc, title
-        String name = el.nodeName();
-        return StringUtil.in(name, "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
-                "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
-                "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
-                "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
-                "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
-                "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
-                "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
-                "title", "tr", "ul", "wbr", "xmp");
-    }
-
-    // active formatting elements
-    void pushActiveFormattingElements(Element in) {
-        int numSeen = 0;
-        Iterator<Element> iter = formattingElements.descendingIterator();
-        while (iter.hasNext()) {
-            Element el =  iter.next();
-            if (el == null) // marker
-                break;
-
-            if (isSameFormattingElement(in, el))
-                numSeen++;
-
-            if (numSeen == 3) {
-                iter.remove();
-                break;
-            }
-        }
-        formattingElements.add(in);
-    }
-
-    private boolean isSameFormattingElement(Element a, Element b) {
-        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
-        return a.nodeName().equals(b.nodeName()) &&
-                // a.namespace().equals(b.namespace()) &&
-                a.attributes().equals(b.attributes());
-        // todo: namespaces
-    }
-
-    void reconstructFormattingElements() {
-        int size = formattingElements.size();
-        if (size == 0 || formattingElements.getLast() == null || onStack(formattingElements.getLast()))
-            return;
-
-        Element entry = formattingElements.getLast();
-        int pos = size - 1;
-        boolean skip = false;
-        while (true) {
-            if (pos == 0) { // step 4. if none before, skip to 8
-                skip = true;
-                break;
-            }
-            entry = formattingElements.get(--pos); // step 5. one earlier than entry
-            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
-                break; // jump to 8, else continue back to 4
-        }
-        while(true) {
-            if (!skip) // step 7: on later than entry
-                entry = formattingElements.get(++pos);
-            Validate.notNull(entry); // should not occur, as we break at last element
-
-            // 8. create new element from element, 9 insert into current node, onto stack
-            skip = false; // can only skip increment from 4.
-            Element newEl = insert(entry.nodeName()); // todo: avoid fostering here?
-            // newEl.namespace(entry.namespace()); // todo: namespaces
-            newEl.attributes().addAll(entry.attributes());
-
-            // 10. replace entry with new entry
-            formattingElements.add(pos, newEl);
-            formattingElements.remove(pos + 1);
-
-            // 11
-            if (pos == size-1) // if not last entry in list, jump to 7
-                break;
-        }
-    }
-
-    void clearFormattingElementsToLastMarker() {
-        while (!formattingElements.isEmpty()) {
-            Element el = formattingElements.peekLast();
-            formattingElements.removeLast();
-            if (el == null)
-                break;
-        }
-    }
-
-    void removeFromActiveFormattingElements(Element el) {
-        Iterator<Element> it = formattingElements.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == el) {
-                it.remove();
-                break;
-            }
-        }
-    }
-
-    boolean isInActiveFormattingElements(Element el) {
-        return isElementInQueue(formattingElements, el);
-    }
-
-    Element getActiveFormattingElement(String nodeName) {
-        Iterator<Element> it = formattingElements.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == null) // scope marker
-                break;
-            else if (next.nodeName().equals(nodeName))
-                return next;
-        }
-        return null;
-    }
-
-    void replaceActiveFormattingElement(Element out, Element in) {
-        replaceInQueue(formattingElements, out, in);
-    }
-
-    void insertMarkerToFormattingElements() {
-        formattingElements.add(null);
-    }
-
-    void insertInFosterParent(Node in) {
-        Element fosterParent = null;
-        Element lastTable = getFromStack("table");
-        boolean isLastTableParent = false;
-        if (lastTable != null) {
-            if (lastTable.parent() != null) {
-                fosterParent = lastTable.parent();
-                isLastTableParent = true;
-            } else
-                fosterParent = aboveOnStack(lastTable);
-        } else { // no table == frag
-            fosterParent = stack.get(0);
-        }
-
-        if (isLastTableParent) {
-            Validate.notNull(lastTable); // last table cannot be null by this point.
-            lastTable.before(in);
-        }
-        else
-            fosterParent.appendChild(in);
-    }
-
-    @Override
-    public String toString() {
-        return "TreeBuilder{" +
-                "currentToken=" + currentToken +
-                ", state=" + state +
-                ", currentElement=" + currentElement() +
-                '}';
-    }
-}
diff --git a/src/org/jsoup/parser/HtmlTreeBuilderState.java b/src/org/jsoup/parser/HtmlTreeBuilderState.java
deleted file mode 100644
index ceab9faa5a..0000000000
--- a/src/org/jsoup/parser/HtmlTreeBuilderState.java
+++ /dev/null
@@ -1,1482 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.nodes.*;
-
-import java.util.Iterator;
-import java.util.LinkedList;
-
-/**
- * The Tree Builder's current state. Each state embodies the processing for the state, and transitions to other states.
- */
-enum HtmlTreeBuilderState {
-    Initial {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                return true; // ignore whitespace
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype()) {
-                // todo: parse error check on expected doctypes
-                // todo: quirk state check on doctype ids
-                Token.Doctype d = t.asDoctype();
-                DocumentType doctype = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri());
-                tb.getDocument().appendChild(doctype);
-                if (d.isForceQuirks())
-                    tb.getDocument().quirksMode(Document.QuirksMode.quirks);
-                tb.transition(BeforeHtml);
-            } else {
-                // todo: check not iframe srcdoc
-                tb.transition(BeforeHtml);
-                return tb.process(t); // re-process token
-            }
-            return true;
-        }
-    },
-    BeforeHtml {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (isWhitespace(t)) {
-                return true; // ignore whitespace
-            } else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
-                tb.insert(t.asStartTag());
-                tb.transition(BeforeHead);
-            } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().name(), "head", "body", "html", "br"))) {
-                return anythingElse(t, tb);
-            } else if (t.isEndTag()) {
-                tb.error(this);
-                return false;
-            } else {
-                return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            tb.insert("html");
-            tb.transition(BeforeHead);
-            return tb.process(t);
-        }
-    },
-    BeforeHead {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                return true;
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
-                return InBody.process(t, tb); // does not transition
-            } else if (t.isStartTag() && t.asStartTag().name().equals("head")) {
-                Element head = tb.insert(t.asStartTag());
-                tb.setHeadElement(head);
-                tb.transition(InHead);
-            } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().name(), "head", "body", "html", "br"))) {
-                tb.process(new Token.StartTag("head"));
-                return tb.process(t);
-            } else if (t.isEndTag()) {
-                tb.error(this);
-                return false;
-            } else {
-                tb.process(new Token.StartTag("head"));
-                return tb.process(t);
-            }
-            return true;
-        }
-    },
-    InHead {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                tb.insert(t.asCharacter());
-                return true;
-            }
-            switch (t.type) {
-                case Comment:
-                    tb.insert(t.asComment());
-                    break;
-                case Doctype:
-                    tb.error(this);
-                    return false;
-                case StartTag:
-                    Token.StartTag start = t.asStartTag();
-                    String name = start.name();
-                    if (name.equals("html")) {
-                        return InBody.process(t, tb);
-                    } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link")) {
-                        Element el = tb.insertEmpty(start);
-                        // jsoup special: update base the frist time it is seen
-                        if (name.equals("base") && el.hasAttr("href"))
-                            tb.maybeSetBaseUri(el);
-                    } else if (name.equals("meta")) {
-                        Element meta = tb.insertEmpty(start);
-                        // todo: charset switches
-                    } else if (name.equals("title")) {
-                        handleRcData(start, tb);
-                    } else if (StringUtil.in(name, "noframes", "style")) {
-                        handleRawtext(start, tb);
-                    } else if (name.equals("noscript")) {
-                        // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
-                        tb.insert(start);
-                        tb.transition(InHeadNoscript);
-                    } else if (name.equals("script")) {
-                        // skips some script rules as won't execute them
-                        tb.insert(start);
-                        tb.tokeniser.transition(TokeniserState.ScriptData);
-                        tb.markInsertionMode();
-                        tb.transition(Text);
-                    } else if (name.equals("head")) {
-                        tb.error(this);
-                        return false;
-                    } else {
-                        return anythingElse(t, tb);
-                    }
-                    break;
-                case EndTag:
-                    Token.EndTag end = t.asEndTag();
-                    name = end.name();
-                    if (name.equals("head")) {
-                        tb.pop();
-                        tb.transition(AfterHead);
-                    } else if (StringUtil.in(name, "body", "html", "br")) {
-                        return anythingElse(t, tb);
-                    } else {
-                        tb.error(this);
-                        return false;
-                    }
-                    break;
-                default:
-                    return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, TreeBuilder tb) {
-            tb.process(new Token.EndTag("head"));
-            return tb.process(t);
-        }
-    },
-    InHeadNoscript {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isDoctype()) {
-                tb.error(this);
-            } else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
-                return tb.process(t, InBody);
-            } else if (t.isEndTag() && t.asEndTag().name().equals("noscript")) {
-                tb.pop();
-                tb.transition(InHead);
-            } else if (isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.in(t.asStartTag().name(),
-                    "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
-                return tb.process(t, InHead);
-            } else if (t.isEndTag() && t.asEndTag().name().equals("br")) {
-                return anythingElse(t, tb);
-            } else if ((t.isStartTag() && StringUtil.in(t.asStartTag().name(), "head", "noscript")) || t.isEndTag()) {
-                tb.error(this);
-                return false;
-            } else {
-                return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            tb.error(this);
-            tb.process(new Token.EndTag("noscript"));
-            return tb.process(t);
-        }
-    },
-    AfterHead {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                tb.insert(t.asCharacter());
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype()) {
-                tb.error(this);
-            } else if (t.isStartTag()) {
-                Token.StartTag startTag = t.asStartTag();
-                String name = startTag.name();
-                if (name.equals("html")) {
-                    return tb.process(t, InBody);
-                } else if (name.equals("body")) {
-                    tb.insert(startTag);
-                    tb.framesetOk(false);
-                    tb.transition(InBody);
-                } else if (name.equals("frameset")) {
-                    tb.insert(startTag);
-                    tb.transition(InFrameset);
-                } else if (StringUtil.in(name, "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) {
-                    tb.error(this);
-                    Element head = tb.getHeadElement();
-                    tb.push(head);
-                    tb.process(t, InHead);
-                    tb.removeFromStack(head);
-                } else if (name.equals("head")) {
-                    tb.error(this);
-                    return false;
-                } else {
-                    anythingElse(t, tb);
-                }
-            } else if (t.isEndTag()) {
-                if (StringUtil.in(t.asEndTag().name(), "body", "html")) {
-                    anythingElse(t, tb);
-                } else {
-                    tb.error(this);
-                    return false;
-                }
-            } else {
-                anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            tb.process(new Token.StartTag("body"));
-            tb.framesetOk(true);
-            return tb.process(t);
-        }
-    },
-    InBody {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            switch (t.type) {
-                case Character: {
-                    Token.Character c = t.asCharacter();
-                    if (c.getData().equals(nullString)) {
-                        // todo confirm that check
-                        tb.error(this);
-                        return false;
-                    } else if (isWhitespace(c)) {
-                        tb.reconstructFormattingElements();
-                        tb.insert(c);
-                    } else {
-                        tb.reconstructFormattingElements();
-                        tb.insert(c);
-                        tb.framesetOk(false);
-                    }
-                    break;
-                }
-                case Comment: {
-                    tb.insert(t.asComment());
-                    break;
-                }
-                case Doctype: {
-                    tb.error(this);
-                    return false;
-                }
-                case StartTag:
-                    Token.StartTag startTag = t.asStartTag();
-                    String name = startTag.name();
-                    if (name.equals("html")) {
-                        tb.error(this);
-                        // merge attributes onto real html
-                        Element html = tb.getStack().getFirst();
-                        for (Attribute attribute : startTag.getAttributes()) {
-                            if (!html.hasAttr(attribute.getKey()))
-                                html.attributes().put(attribute);
-                        }
-                    } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title")) {
-                        return tb.process(t, InHead);
-                    } else if (name.equals("body")) {
-                        tb.error(this);
-                        LinkedList<Element> stack = tb.getStack();
-                        if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
-                            // only in fragment case
-                            return false; // ignore
-                        } else {
-                            tb.framesetOk(false);
-                            Element body = stack.get(1);
-                            for (Attribute attribute : startTag.getAttributes()) {
-                                if (!body.hasAttr(attribute.getKey()))
-                                    body.attributes().put(attribute);
-                            }
-                        }
-                    } else if (name.equals("frameset")) {
-                        tb.error(this);
-                        LinkedList<Element> stack = tb.getStack();
-                        if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
-                            // only in fragment case
-                            return false; // ignore
-                        } else if (!tb.framesetOk()) {
-                            return false; // ignore frameset
-                        } else {
-                            Element second = stack.get(1);
-                            if (second.parent() != null)
-                                second.remove();
-                            // pop up to html element
-                            while (stack.size() > 1)
-                                stack.removeLast();
-                            tb.insert(startTag);
-                            tb.transition(InFrameset);
-                        }
-                    } else if (StringUtil.in(name,
-                            "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl",
-                            "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol",
-                            "p", "section", "summary", "ul")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                    } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5", "h6")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        if (StringUtil.in(tb.currentElement().nodeName(), "h1", "h2", "h3", "h4", "h5", "h6")) {
-                            tb.error(this);
-                            tb.pop();
-                        }
-                        tb.insert(startTag);
-                    } else if (StringUtil.in(name, "pre", "listing")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                        // todo: ignore LF if next token
-                        tb.framesetOk(false);
-                    } else if (name.equals("form")) {
-                        if (tb.getFormElement() != null) {
-                            tb.error(this);
-                            return false;
-                        }
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        Element form = tb.insert(startTag);
-                        tb.setFormElement(form);
-                    } else if (name.equals("li")) {
-                        tb.framesetOk(false);
-                        LinkedList<Element> stack = tb.getStack();
-                        for (int i = stack.size() - 1; i > 0; i--) {
-                            Element el = stack.get(i);
-                            if (el.nodeName().equals("li")) {
-                                tb.process(new Token.EndTag("li"));
-                                break;
-                            }
-                            if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), "address", "div", "p"))
-                                break;
-                        }
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                    } else if (StringUtil.in(name, "dd", "dt")) {
-                        tb.framesetOk(false);
-                        LinkedList<Element> stack = tb.getStack();
-                        for (int i = stack.size() - 1; i > 0; i--) {
-                            Element el = stack.get(i);
-                            if (StringUtil.in(el.nodeName(), "dd", "dt")) {
-                                tb.process(new Token.EndTag(el.nodeName()));
-                                break;
-                            }
-                            if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), "address", "div", "p"))
-                                break;
-                        }
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                    } else if (name.equals("plaintext")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                        tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out
-                    } else if (name.equals("button")) {
-                        if (tb.inButtonScope("button")) {
-                            // close and reprocess
-                            tb.error(this);
-                            tb.process(new Token.EndTag("button"));
-                            tb.process(startTag);
-                        } else {
-                            tb.reconstructFormattingElements();
-                            tb.insert(startTag);
-                            tb.framesetOk(false);
-                        }
-                    } else if (name.equals("a")) {
-                        if (tb.getActiveFormattingElement("a") != null) {
-                            tb.error(this);
-                            tb.process(new Token.EndTag("a"));
-
-                            // still on stack?
-                            Element remainingA = tb.getFromStack("a");
-                            if (remainingA != null) {
-                                tb.removeFromActiveFormattingElements(remainingA);
-                                tb.removeFromStack(remainingA);
-                            }
-                        }
-                        tb.reconstructFormattingElements();
-                        Element a = tb.insert(startTag);
-                        tb.pushActiveFormattingElements(a);
-                    } else if (StringUtil.in(name,
-                            "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) {
-                        tb.reconstructFormattingElements();
-                        Element el = tb.insert(startTag);
-                        tb.pushActiveFormattingElements(el);
-                    } else if (name.equals("nobr")) {
-                        tb.reconstructFormattingElements();
-                        if (tb.inScope("nobr")) {
-                            tb.error(this);
-                            tb.process(new Token.EndTag("nobr"));
-                            tb.reconstructFormattingElements();
-                        }
-                        Element el = tb.insert(startTag);
-                        tb.pushActiveFormattingElements(el);
-                    } else if (StringUtil.in(name, "applet", "marquee", "object")) {
-                        tb.reconstructFormattingElements();
-                        tb.insert(startTag);
-                        tb.insertMarkerToFormattingElements();
-                        tb.framesetOk(false);
-                    } else if (name.equals("table")) {
-                        if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insert(startTag);
-                        tb.framesetOk(false);
-                        tb.transition(InTable);
-                    } else if (StringUtil.in(name, "area", "br", "embed", "img", "keygen", "wbr")) {
-                        tb.reconstructFormattingElements();
-                        tb.insertEmpty(startTag);
-                        tb.framesetOk(false);
-                    } else if (name.equals("input")) {
-                        tb.reconstructFormattingElements();
-                        Element el = tb.insertEmpty(startTag);
-                        if (!el.attr("type").equalsIgnoreCase("hidden"))
-                            tb.framesetOk(false);
-                    } else if (StringUtil.in(name, "param", "source", "track")) {
-                        tb.insertEmpty(startTag);
-                    } else if (name.equals("hr")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.insertEmpty(startTag);
-                        tb.framesetOk(false);
-                    } else if (name.equals("image")) {
-                        // we're not supposed to ask.
-                        startTag.name("img");
-                        return tb.process(startTag);
-                    } else if (name.equals("isindex")) {
-                        // how much do we care about the early 90s?
-                        tb.error(this);
-                        if (tb.getFormElement() != null)
-                            return false;
-
-                        tb.tokeniser.acknowledgeSelfClosingFlag();
-                        tb.process(new Token.StartTag("form"));
-                        if (startTag.attributes.hasKey("action")) {
-                            Element form = tb.getFormElement();
-                            form.attr("action", startTag.attributes.get("action"));
-                        }
-                        tb.process(new Token.StartTag("hr"));
-                        tb.process(new Token.StartTag("label"));
-                        // hope you like english.
-                        String prompt = startTag.attributes.hasKey("prompt") ?
-                                startTag.attributes.get("prompt") :
-                                "This is a searchable index. Enter search keywords: ";
-
-                        tb.process(new Token.Character(prompt));
-
-                        // input
-                        Attributes inputAttribs = new Attributes();
-                        for (Attribute attr : startTag.attributes) {
-                            if (!StringUtil.in(attr.getKey(), "name", "action", "prompt"))
-                                inputAttribs.put(attr);
-                        }
-                        inputAttribs.put("name", "isindex");
-                        tb.process(new Token.StartTag("input", inputAttribs));
-                        tb.process(new Token.EndTag("label"));
-                        tb.process(new Token.StartTag("hr"));
-                        tb.process(new Token.EndTag("form"));
-                    } else if (name.equals("textarea")) {
-                        tb.insert(startTag);
-                        // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
-                        tb.tokeniser.transition(TokeniserState.Rcdata);
-                        tb.markInsertionMode();
-                        tb.framesetOk(false);
-                        tb.transition(Text);
-                    } else if (name.equals("xmp")) {
-                        if (tb.inButtonScope("p")) {
-                            tb.process(new Token.EndTag("p"));
-                        }
-                        tb.reconstructFormattingElements();
-                        tb.framesetOk(false);
-                        handleRawtext(startTag, tb);
-                    } else if (name.equals("iframe")) {
-                        tb.framesetOk(false);
-                        handleRawtext(startTag, tb);
-                    } else if (name.equals("noembed")) {
-                        // also handle noscript if script enabled
-                        handleRawtext(startTag, tb);
-                    } else if (name.equals("select")) {
-                        tb.reconstructFormattingElements();
-                        tb.insert(startTag);
-                        tb.framesetOk(false);
-
-                        HtmlTreeBuilderState state = tb.state();
-                        if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell))
-                            tb.transition(InSelectInTable);
-                        else
-                            tb.transition(InSelect);
-                    } else if (StringUtil.in("optgroup", "option")) {
-                        if (tb.currentElement().nodeName().equals("option"))
-                            tb.process(new Token.EndTag("option"));
-                        tb.reconstructFormattingElements();
-                        tb.insert(startTag);
-                    } else if (StringUtil.in("rp", "rt")) {
-                        if (tb.inScope("ruby")) {
-                            tb.generateImpliedEndTags();
-                            if (!tb.currentElement().nodeName().equals("ruby")) {
-                                tb.error(this);
-                                tb.popStackToBefore("ruby"); // i.e. close up to but not include name
-                            }
-                            tb.insert(startTag);
-                        }
-                    } else if (name.equals("math")) {
-                        tb.reconstructFormattingElements();
-                        // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
-                        tb.insert(startTag);
-                        tb.tokeniser.acknowledgeSelfClosingFlag();
-                    } else if (name.equals("svg")) {
-                        tb.reconstructFormattingElements();
-                        // todo: handle A start tag whose tag name is "svg" (xlink, svg)
-                        tb.insert(startTag);
-                        tb.tokeniser.acknowledgeSelfClosingFlag();
-                    } else if (StringUtil.in(name,
-                            "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr")) {
-                        tb.error(this);
-                        return false;
-                    } else {
-                        tb.reconstructFormattingElements();
-                        tb.insert(startTag);
-                    }
-                    break;
-
-                case EndTag:
-                    Token.EndTag endTag = t.asEndTag();
-                    name = endTag.name();
-                    if (name.equals("body")) {
-                        if (!tb.inScope("body")) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
-                            tb.transition(AfterBody);
-                        }
-                    } else if (name.equals("html")) {
-                        boolean notIgnored = tb.process(new Token.EndTag("body"));
-                        if (notIgnored)
-                            return tb.process(endTag);
-                    } else if (StringUtil.in(name,
-                            "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div",
-                            "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu",
-                            "nav", "ol", "pre", "section", "summary", "ul")) {
-                        // todo: refactor these lookups
-                        if (!tb.inScope(name)) {
-                            // nothing to close
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.generateImpliedEndTags();
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose(name);
-                        }
-                    } else if (name.equals("form")) {
-                        Element currentForm = tb.getFormElement();
-                        tb.setFormElement(null);
-                        if (currentForm == null || !tb.inScope(name)) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.generateImpliedEndTags();
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            // remove currentForm from stack. will shift anything under up.
-                            tb.removeFromStack(currentForm);
-                        }
-                    } else if (name.equals("p")) {
-                        if (!tb.inButtonScope(name)) {
-                            tb.error(this);
-                            tb.process(new Token.StartTag(name)); // if no p to close, creates an empty <p></p>
-                            return tb.process(endTag);
-                        } else {
-                            tb.generateImpliedEndTags(name);
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose(name);
-                        }
-                    } else if (name.equals("li")) {
-                        if (!tb.inListItemScope(name)) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.generateImpliedEndTags(name);
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose(name);
-                        }
-                    } else if (StringUtil.in(name, "dd", "dt")) {
-                        if (!tb.inScope(name)) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.generateImpliedEndTags(name);
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose(name);
-                        }
-                    } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5", "h6")) {
-                        if (!tb.inScope(new String[]{"h1", "h2", "h3", "h4", "h5", "h6"})) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.generateImpliedEndTags(name);
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose("h1", "h2", "h3", "h4", "h5", "h6");
-                        }
-                    } else if (name.equals("sarcasm")) {
-                        // *sigh*
-                        return anyOtherEndTag(t, tb);
-                    } else if (StringUtil.in(name,
-                            "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) {
-                        // Adoption Agency Algorithm.
-                        OUTER:
-                        for (int i = 0; i < 8; i++) {
-                            Element formatEl = tb.getActiveFormattingElement(name);
-                            if (formatEl == null)
-                                return anyOtherEndTag(t, tb);
-                            else if (!tb.onStack(formatEl)) {
-                                tb.error(this);
-                                tb.removeFromActiveFormattingElements(formatEl);
-                                return true;
-                            } else if (!tb.inScope(formatEl.nodeName())) {
-                                tb.error(this);
-                                return false;
-                            } else if (tb.currentElement() != formatEl)
-                                tb.error(this);
-
-                            Element furthestBlock = null;
-                            Element commonAncestor = null;
-                            boolean seenFormattingElement = false;
-                            LinkedList<Element> stack = tb.getStack();
-                            for (int si = 0; si < stack.size(); si++) {
-                                Element el = stack.get(si);
-                                if (el == formatEl) {
-                                    commonAncestor = stack.get(si - 1);
-                                    seenFormattingElement = true;
-                                } else if (seenFormattingElement && tb.isSpecial(el)) {
-                                    furthestBlock = el;
-                                    break;
-                                }
-                            }
-                            if (furthestBlock == null) {
-                                tb.popStackToClose(formatEl.nodeName());
-                                tb.removeFromActiveFormattingElements(formatEl);
-                                return true;
-                            }
-
-                            // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
-                            // does that mean: int pos of format el in list?
-                            Element node = furthestBlock;
-                            Element lastNode = furthestBlock;
-                            INNER:
-                            for (int j = 0; j < 3; j++) {
-                                if (tb.onStack(node))
-                                    node = tb.aboveOnStack(node);
-                                if (!tb.isInActiveFormattingElements(node)) { // note no bookmark check
-                                    tb.removeFromStack(node);
-                                    continue INNER;
-                                } else if (node == formatEl)
-                                    break INNER;
-
-                                Element replacement = new Element(Tag.valueOf(node.nodeName()), tb.getBaseUri());
-                                tb.replaceActiveFormattingElement(node, replacement);
-                                tb.replaceOnStack(node, replacement);
-                                node = replacement;
-
-                                if (lastNode == furthestBlock) {
-                                    // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
-                                    // not getting how this bookmark both straddles the element above, but is inbetween here...
-                                }
-                                if (lastNode.parent() != null)
-                                    lastNode.remove();
-                                node.appendChild(lastNode);
-
-                                lastNode = node;
-                            }
-
-                            if (StringUtil.in(commonAncestor.nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
-                                if (lastNode.parent() != null)
-                                    lastNode.remove();
-                                tb.insertInFosterParent(lastNode);
-                            } else {
-                                if (lastNode.parent() != null)
-                                    lastNode.remove();
-                                commonAncestor.appendChild(lastNode);
-                            }
-
-                            Element adopter = new Element(Tag.valueOf(name), tb.getBaseUri());
-                            Node[] childNodes = furthestBlock.childNodes().toArray(new Node[furthestBlock.childNodes().size()]);
-                            for (Node childNode : childNodes) {
-                                adopter.appendChild(childNode); // append will reparent. thus the clone to avoid concurrent mod.
-                            }
-                            furthestBlock.appendChild(adopter);
-                            tb.removeFromActiveFormattingElements(formatEl);
-                            // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
-                            tb.removeFromStack(formatEl);
-                            tb.insertOnStackAfter(furthestBlock, adopter);
-                        }
-                    } else if (StringUtil.in(name, "applet", "marquee", "object")) {
-                        if (!tb.inScope("name")) {
-                            if (!tb.inScope(name)) {
-                                tb.error(this);
-                                return false;
-                            }
-                            tb.generateImpliedEndTags();
-                            if (!tb.currentElement().nodeName().equals(name))
-                                tb.error(this);
-                            tb.popStackToClose(name);
-                            tb.clearFormattingElementsToLastMarker();
-                        }
-                    } else if (name.equals("br")) {
-                        tb.error(this);
-                        tb.process(new Token.StartTag("br"));
-                        return false;
-                    } else {
-                        return anyOtherEndTag(t, tb);
-                    }
-
-                    break;
-                case EOF:
-                    // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
-                    // stop parsing
-                    break;
-            }
-            return true;
-        }
-
-        boolean anyOtherEndTag(Token t, HtmlTreeBuilder tb) {
-            String name = t.asEndTag().name();
-            DescendableLinkedList<Element> stack = tb.getStack();
-            Iterator<Element> it = stack.descendingIterator();
-            while (it.hasNext()) {
-                Element node = it.next();
-                if (node.nodeName().equals(name)) {
-                    tb.generateImpliedEndTags(name);
-                    if (!name.equals(tb.currentElement().nodeName()))
-                        tb.error(this);
-                    tb.popStackToClose(name);
-                    break;
-                } else {
-                    if (tb.isSpecial(node)) {
-                        tb.error(this);
-                        return false;
-                    }
-                }
-            }
-            return true;
-        }
-    },
-    Text {
-        // in script, style etc. normally treated as data tags
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isCharacter()) {
-                tb.insert(t.asCharacter());
-            } else if (t.isEOF()) {
-                tb.error(this);
-                // if current node is script: already started
-                tb.pop();
-                tb.transition(tb.originalState());
-                return tb.process(t);
-            } else if (t.isEndTag()) {
-                // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts
-                tb.pop();
-                tb.transition(tb.originalState());
-            }
-            return true;
-        }
-    },
-    InTable {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isCharacter()) {
-                tb.newPendingTableCharacters();
-                tb.markInsertionMode();
-                tb.transition(InTableText);
-                return tb.process(t);
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-                return true;
-            } else if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isStartTag()) {
-                Token.StartTag startTag = t.asStartTag();
-                String name = startTag.name();
-                if (name.equals("caption")) {
-                    tb.clearStackToTableContext();
-                    tb.insertMarkerToFormattingElements();
-                    tb.insert(startTag);
-                    tb.transition(InCaption);
-                } else if (name.equals("colgroup")) {
-                    tb.clearStackToTableContext();
-                    tb.insert(startTag);
-                    tb.transition(InColumnGroup);
-                } else if (name.equals("col")) {
-                    tb.process(new Token.StartTag("colgroup"));
-                    return tb.process(t);
-                } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
-                    tb.clearStackToTableContext();
-                    tb.insert(startTag);
-                    tb.transition(InTableBody);
-                } else if (StringUtil.in(name, "td", "th", "tr")) {
-                    tb.process(new Token.StartTag("tbody"));
-                    return tb.process(t);
-                } else if (name.equals("table")) {
-                    tb.error(this);
-                    boolean processed = tb.process(new Token.EndTag("table"));
-                    if (processed) // only ignored if in fragment
-                        return tb.process(t);
-                } else if (StringUtil.in(name, "style", "script")) {
-                    return tb.process(t, InHead);
-                } else if (name.equals("input")) {
-                    if (!startTag.attributes.get("type").equalsIgnoreCase("hidden")) {
-                        return anythingElse(t, tb);
-                    } else {
-                        tb.insertEmpty(startTag);
-                    }
-                } else if (name.equals("form")) {
-                    tb.error(this);
-                    if (tb.getFormElement() != null)
-                        return false;
-                    else {
-                        Element form = tb.insertEmpty(startTag);
-                        tb.setFormElement(form);
-                    }
-                } else {
-                    return anythingElse(t, tb);
-                }
-            } else if (t.isEndTag()) {
-                Token.EndTag endTag = t.asEndTag();
-                String name = endTag.name();
-
-                if (name.equals("table")) {
-                    if (!tb.inTableScope(name)) {
-                        tb.error(this);
-                        return false;
-                    } else {
-                        tb.popStackToClose("table");
-                    }
-                    tb.resetInsertionMode();
-                } else if (StringUtil.in(name,
-                        "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
-                    tb.error(this);
-                    return false;
-                } else {
-                    return anythingElse(t, tb);
-                }
-            } else if (t.isEOF()) {
-                if (tb.currentElement().nodeName().equals("html"))
-                    tb.error(this);
-                return true; // stops parsing
-            }
-            return anythingElse(t, tb);
-        }
-
-        boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            tb.error(this);
-            boolean processed = true;
-            if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
-                tb.setFosterInserts(true);
-                processed = tb.process(t, InBody);
-                tb.setFosterInserts(false);
-            } else {
-                processed = tb.process(t, InBody);
-            }
-            return processed;
-        }
-    },
-    InTableText {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            switch (t.type) {
-                case Character:
-                    Token.Character c = t.asCharacter();
-                    if (c.getData().equals(nullString)) {
-                        tb.error(this);
-                        return false;
-                    } else {
-                        tb.getPendingTableCharacters().add(c);
-                    }
-                    break;
-                default:
-                    if (tb.getPendingTableCharacters().size() > 0) {
-                        for (Token.Character character : tb.getPendingTableCharacters()) {
-                            if (!isWhitespace(character)) {
-                                // InTable anything else section:
-                                tb.error(this);
-                                if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
-                                    tb.setFosterInserts(true);
-                                    tb.process(character, InBody);
-                                    tb.setFosterInserts(false);
-                                } else {
-                                    tb.process(character, InBody);
-                                }
-                            } else
-                                tb.insert(character);
-                        }
-                        tb.newPendingTableCharacters();
-                    }
-                    tb.transition(tb.originalState());
-                    return tb.process(t);
-            }
-            return true;
-        }
-    },
-    InCaption {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isEndTag() && t.asEndTag().name().equals("caption")) {
-                Token.EndTag endTag = t.asEndTag();
-                String name = endTag.name();
-                if (!tb.inTableScope(name)) {
-                    tb.error(this);
-                    return false;
-                } else {
-                    tb.generateImpliedEndTags();
-                    if (!tb.currentElement().nodeName().equals("caption"))
-                        tb.error(this);
-                    tb.popStackToClose("caption");
-                    tb.clearFormattingElementsToLastMarker();
-                    tb.transition(InTable);
-                }
-            } else if ((
-                    t.isStartTag() && StringUtil.in(t.asStartTag().name(),
-                            "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
-                            t.isEndTag() && t.asEndTag().name().equals("table"))
-                    ) {
-                tb.error(this);
-                boolean processed = tb.process(new Token.EndTag("caption"));
-                if (processed)
-                    return tb.process(t);
-            } else if (t.isEndTag() && StringUtil.in(t.asEndTag().name(),
-                    "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
-                tb.error(this);
-                return false;
-            } else {
-                return tb.process(t, InBody);
-            }
-            return true;
-        }
-    },
-    InColumnGroup {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                tb.insert(t.asCharacter());
-                return true;
-            }
-            switch (t.type) {
-                case Comment:
-                    tb.insert(t.asComment());
-                    break;
-                case Doctype:
-                    tb.error(this);
-                    break;
-                case StartTag:
-                    Token.StartTag startTag = t.asStartTag();
-                    String name = startTag.name();
-                    if (name.equals("html"))
-                        return tb.process(t, InBody);
-                    else if (name.equals("col"))
-                        tb.insertEmpty(startTag);
-                    else
-                        return anythingElse(t, tb);
-                    break;
-                case EndTag:
-                    Token.EndTag endTag = t.asEndTag();
-                    name = endTag.name();
-                    if (name.equals("colgroup")) {
-                        if (tb.currentElement().nodeName().equals("html")) { // frag case
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.pop();
-                            tb.transition(InTable);
-                        }
-                    } else
-                        return anythingElse(t, tb);
-                    break;
-                case EOF:
-                    if (tb.currentElement().nodeName().equals("html"))
-                        return true; // stop parsing; frag case
-                    else
-                        return anythingElse(t, tb);
-                default:
-                    return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, TreeBuilder tb) {
-            boolean processed = tb.process(new Token.EndTag("colgroup"));
-            if (processed) // only ignored in frag case
-                return tb.process(t);
-            return true;
-        }
-    },
-    InTableBody {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            switch (t.type) {
-                case StartTag:
-                    Token.StartTag startTag = t.asStartTag();
-                    String name = startTag.name();
-                    if (name.equals("tr")) {
-                        tb.clearStackToTableBodyContext();
-                        tb.insert(startTag);
-                        tb.transition(InRow);
-                    } else if (StringUtil.in(name, "th", "td")) {
-                        tb.error(this);
-                        tb.process(new Token.StartTag("tr"));
-                        return tb.process(startTag);
-                    } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
-                        return exitTableBody(t, tb);
-                    } else
-                        return anythingElse(t, tb);
-                    break;
-                case EndTag:
-                    Token.EndTag endTag = t.asEndTag();
-                    name = endTag.name();
-                    if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
-                        if (!tb.inTableScope(name)) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.clearStackToTableBodyContext();
-                            tb.pop();
-                            tb.transition(InTable);
-                        }
-                    } else if (name.equals("table")) {
-                        return exitTableBody(t, tb);
-                    } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
-                        tb.error(this);
-                        return false;
-                    } else
-                        return anythingElse(t, tb);
-                    break;
-                default:
-                    return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean exitTableBody(Token t, HtmlTreeBuilder tb) {
-            if (!(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb.inScope("tfoot"))) {
-                // frag case
-                tb.error(this);
-                return false;
-            }
-            tb.clearStackToTableBodyContext();
-            tb.process(new Token.EndTag(tb.currentElement().nodeName())); // tbody, tfoot, thead
-            return tb.process(t);
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            return tb.process(t, InTable);
-        }
-    },
-    InRow {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isStartTag()) {
-                Token.StartTag startTag = t.asStartTag();
-                String name = startTag.name();
-
-                if (StringUtil.in(name, "th", "td")) {
-                    tb.clearStackToTableRowContext();
-                    tb.insert(startTag);
-                    tb.transition(InCell);
-                    tb.insertMarkerToFormattingElements();
-                } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) {
-                    return handleMissingTr(t, tb);
-                } else {
-                    return anythingElse(t, tb);
-                }
-            } else if (t.isEndTag()) {
-                Token.EndTag endTag = t.asEndTag();
-                String name = endTag.name();
-
-                if (name.equals("tr")) {
-                    if (!tb.inTableScope(name)) {
-                        tb.error(this); // frag
-                        return false;
-                    }
-                    tb.clearStackToTableRowContext();
-                    tb.pop(); // tr
-                    tb.transition(InTableBody);
-                } else if (name.equals("table")) {
-                    return handleMissingTr(t, tb);
-                } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
-                    if (!tb.inTableScope(name)) {
-                        tb.error(this);
-                        return false;
-                    }
-                    tb.process(new Token.EndTag("tr"));
-                    return tb.process(t);
-                } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th")) {
-                    tb.error(this);
-                    return false;
-                } else {
-                    return anythingElse(t, tb);
-                }
-            } else {
-                return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            return tb.process(t, InTable);
-        }
-
-        private boolean handleMissingTr(Token t, TreeBuilder tb) {
-            boolean processed = tb.process(new Token.EndTag("tr"));
-            if (processed)
-                return tb.process(t);
-            else
-                return false;
-        }
-    },
-    InCell {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isEndTag()) {
-                Token.EndTag endTag = t.asEndTag();
-                String name = endTag.name();
-
-                if (StringUtil.in(name, "td", "th")) {
-                    if (!tb.inTableScope(name)) {
-                        tb.error(this);
-                        tb.transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag
-                        return false;
-                    }
-                    tb.generateImpliedEndTags();
-                    if (!tb.currentElement().nodeName().equals(name))
-                        tb.error(this);
-                    tb.popStackToClose(name);
-                    tb.clearFormattingElementsToLastMarker();
-                    tb.transition(InRow);
-                } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html")) {
-                    tb.error(this);
-                    return false;
-                } else if (StringUtil.in(name, "table", "tbody", "tfoot", "thead", "tr")) {
-                    if (!tb.inTableScope(name)) {
-                        tb.error(this);
-                        return false;
-                    }
-                    closeCell(tb);
-                    return tb.process(t);
-                } else {
-                    return anythingElse(t, tb);
-                }
-            } else if (t.isStartTag() &&
-                    StringUtil.in(t.asStartTag().name(),
-                            "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
-                if (!(tb.inTableScope("td") || tb.inTableScope("th"))) {
-                    tb.error(this);
-                    return false;
-                }
-                closeCell(tb);
-                return tb.process(t);
-            } else {
-                return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            return tb.process(t, InBody);
-        }
-
-        private void closeCell(HtmlTreeBuilder tb) {
-            if (tb.inTableScope("td"))
-                tb.process(new Token.EndTag("td"));
-            else
-                tb.process(new Token.EndTag("th")); // only here if th or td in scope
-        }
-    },
-    InSelect {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            switch (t.type) {
-                case Character:
-                    Token.Character c = t.asCharacter();
-                    if (c.getData().equals(nullString)) {
-                        tb.error(this);
-                        return false;
-                    } else {
-                        tb.insert(c);
-                    }
-                    break;
-                case Comment:
-                    tb.insert(t.asComment());
-                    break;
-                case Doctype:
-                    tb.error(this);
-                    return false;
-                case StartTag:
-                    Token.StartTag start = t.asStartTag();
-                    String name = start.name();
-                    if (name.equals("html"))
-                        return tb.process(start, InBody);
-                    else if (name.equals("option")) {
-                        tb.process(new Token.EndTag("option"));
-                        tb.insert(start);
-                    } else if (name.equals("optgroup")) {
-                        if (tb.currentElement().nodeName().equals("option"))
-                            tb.process(new Token.EndTag("option"));
-                        else if (tb.currentElement().nodeName().equals("optgroup"))
-                            tb.process(new Token.EndTag("optgroup"));
-                        tb.insert(start);
-                    } else if (name.equals("select")) {
-                        tb.error(this);
-                        return tb.process(new Token.EndTag("select"));
-                    } else if (StringUtil.in(name, "input", "keygen", "textarea")) {
-                        tb.error(this);
-                        if (!tb.inSelectScope("select"))
-                            return false; // frag
-                        tb.process(new Token.EndTag("select"));
-                        return tb.process(start);
-                    } else if (name.equals("script")) {
-                        return tb.process(t, InHead);
-                    } else {
-                        return anythingElse(t, tb);
-                    }
-                    break;
-                case EndTag:
-                    Token.EndTag end = t.asEndTag();
-                    name = end.name();
-                    if (name.equals("optgroup")) {
-                        if (tb.currentElement().nodeName().equals("option") && tb.aboveOnStack(tb.currentElement()) != null && tb.aboveOnStack(tb.currentElement()).nodeName().equals("optgroup"))
-                            tb.process(new Token.EndTag("option"));
-                        if (tb.currentElement().nodeName().equals("optgroup"))
-                            tb.pop();
-                        else
-                            tb.error(this);
-                    } else if (name.equals("option")) {
-                        if (tb.currentElement().nodeName().equals("option"))
-                            tb.pop();
-                        else
-                            tb.error(this);
-                    } else if (name.equals("select")) {
-                        if (!tb.inSelectScope(name)) {
-                            tb.error(this);
-                            return false;
-                        } else {
-                            tb.popStackToClose(name);
-                            tb.resetInsertionMode();
-                        }
-                    } else
-                        return anythingElse(t, tb);
-                    break;
-                case EOF:
-                    if (!tb.currentElement().nodeName().equals("html"))
-                        tb.error(this);
-                    break;
-                default:
-                    return anythingElse(t, tb);
-            }
-            return true;
-        }
-
-        private boolean anythingElse(Token t, HtmlTreeBuilder tb) {
-            tb.error(this);
-            return false;
-        }
-    },
-    InSelectInTable {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isStartTag() && StringUtil.in(t.asStartTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
-                tb.error(this);
-                tb.process(new Token.EndTag("select"));
-                return tb.process(t);
-            } else if (t.isEndTag() && StringUtil.in(t.asEndTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
-                tb.error(this);
-                if (tb.inTableScope(t.asEndTag().name())) {
-                    tb.process(new Token.EndTag("select"));
-                    return (tb.process(t));
-                } else
-                    return false;
-            } else {
-                return tb.process(t, InSelect);
-            }
-        }
-    },
-    AfterBody {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                return tb.process(t, InBody);
-            } else if (t.isComment()) {
-                tb.insert(t.asComment()); // into html node
-            } else if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
-                return tb.process(t, InBody);
-            } else if (t.isEndTag() && t.asEndTag().name().equals("html")) {
-                if (tb.isFragmentParsing()) {
-                    tb.error(this);
-                    return false;
-                } else {
-                    tb.transition(AfterAfterBody);
-                }
-            } else if (t.isEOF()) {
-                // chillax! we're done
-            } else {
-                tb.error(this);
-                tb.transition(InBody);
-                return tb.process(t);
-            }
-            return true;
-        }
-    },
-    InFrameset {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                tb.insert(t.asCharacter());
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isStartTag()) {
-                Token.StartTag start = t.asStartTag();
-                String name = start.name();
-                if (name.equals("html")) {
-                    return tb.process(start, InBody);
-                } else if (name.equals("frameset")) {
-                    tb.insert(start);
-                } else if (name.equals("frame")) {
-                    tb.insertEmpty(start);
-                } else if (name.equals("noframes")) {
-                    return tb.process(start, InHead);
-                } else {
-                    tb.error(this);
-                    return false;
-                }
-            } else if (t.isEndTag() && t.asEndTag().name().equals("frameset")) {
-                if (tb.currentElement().nodeName().equals("html")) { // frag
-                    tb.error(this);
-                    return false;
-                } else {
-                    tb.pop();
-                    if (!tb.isFragmentParsing() && !tb.currentElement().nodeName().equals("frameset")) {
-                        tb.transition(AfterFrameset);
-                    }
-                }
-            } else if (t.isEOF()) {
-                if (!tb.currentElement().nodeName().equals("html")) {
-                    tb.error(this);
-                    return true;
-                }
-            } else {
-                tb.error(this);
-                return false;
-            }
-            return true;
-        }
-    },
-    AfterFrameset {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (isWhitespace(t)) {
-                tb.insert(t.asCharacter());
-            } else if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype()) {
-                tb.error(this);
-                return false;
-            } else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
-                return tb.process(t, InBody);
-            } else if (t.isEndTag() && t.asEndTag().name().equals("html")) {
-                tb.transition(AfterAfterFrameset);
-            } else if (t.isStartTag() && t.asStartTag().name().equals("noframes")) {
-                return tb.process(t, InHead);
-            } else if (t.isEOF()) {
-                // cool your heels, we're complete
-            } else {
-                tb.error(this);
-                return false;
-            }
-            return true;
-        }
-    },
-    AfterAfterBody {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
-                return tb.process(t, InBody);
-            } else if (t.isEOF()) {
-                // nice work chuck
-            } else {
-                tb.error(this);
-                tb.transition(InBody);
-                return tb.process(t);
-            }
-            return true;
-        }
-    },
-    AfterAfterFrameset {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            if (t.isComment()) {
-                tb.insert(t.asComment());
-            } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
-                return tb.process(t, InBody);
-            } else if (t.isEOF()) {
-                // nice work chuck
-            } else if (t.isStartTag() && t.asStartTag().name().equals("noframes")) {
-                return tb.process(t, InHead);
-            } else {
-                tb.error(this);
-                return false;
-            }
-            return true;
-        }
-    },
-    ForeignContent {
-        boolean process(Token t, HtmlTreeBuilder tb) {
-            return true;
-            // todo: implement. Also; how do we get here?
-        }
-    };
-
-    private static String nullString = String.valueOf('\u0000');
-
-    abstract boolean process(Token t, HtmlTreeBuilder tb);
-
-    private static boolean isWhitespace(Token t) {
-        if (t.isCharacter()) {
-            String data = t.asCharacter().getData();
-            // todo: this checks more than spec - "\t", "\n", "\f", "\r", " "
-            for (int i = 0; i < data.length(); i++) {
-                char c = data.charAt(i);
-                if (!StringUtil.isWhitespace(c))
-                    return false;
-            }
-            return true;
-        }
-        return false;
-    }
-
-    private static void handleRcData(Token.StartTag startTag, HtmlTreeBuilder tb) {
-        tb.insert(startTag);
-        tb.tokeniser.transition(TokeniserState.Rcdata);
-        tb.markInsertionMode();
-        tb.transition(Text);
-    }
-
-    private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) {
-        tb.insert(startTag);
-        tb.tokeniser.transition(TokeniserState.Rawtext);
-        tb.markInsertionMode();
-        tb.transition(Text);
-    }
-}
diff --git a/src/org/jsoup/parser/ParseError.java b/src/org/jsoup/parser/ParseError.java
deleted file mode 100644
index dfa090051b..0000000000
--- a/src/org/jsoup/parser/ParseError.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package org.jsoup.parser;
-
-/**
- * A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
- */
-public class ParseError {
-    private int pos;
-    private String errorMsg;
-
-    ParseError(int pos, String errorMsg) {
-        this.pos = pos;
-        this.errorMsg = errorMsg;
-    }
-
-    ParseError(int pos, String errorFormat, Object... args) {
-        this.errorMsg = String.format(errorFormat, args);
-        this.pos = pos;
-    }
-
-    /**
-     * Retrieve the error message.
-     * @return the error message.
-     */
-    public String getErrorMessage() {
-        return errorMsg;
-    }
-
-    /**
-     * Retrieves the offset of the error.
-     * @return error offset within input
-     */
-    public int getPosition() {
-        return pos;
-    }
-
-    @Override
-    public String toString() {
-        return pos + ": " + errorMsg;
-    }
-}
diff --git a/src/org/jsoup/parser/ParseErrorList.java b/src/org/jsoup/parser/ParseErrorList.java
deleted file mode 100644
index 3824ffbc4e..0000000000
--- a/src/org/jsoup/parser/ParseErrorList.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package org.jsoup.parser;
-
-import java.util.ArrayList;
-
-/**
- * A container for ParseErrors.
- * 
- * @author Jonathan Hedley
- */
-class ParseErrorList extends ArrayList<ParseError>{
-    private static final int INITIAL_CAPACITY = 16;
-    private final int maxSize;
-    
-    ParseErrorList(int initialCapacity, int maxSize) {
-        super(initialCapacity);
-        this.maxSize = maxSize;
-    }
-    
-    boolean canAddError() {
-        return size() < maxSize;
-    }
-
-    int getMaxSize() {
-        return maxSize;
-    }
-
-    static ParseErrorList noTracking() {
-        return new ParseErrorList(0, 0);
-    }
-    
-    static ParseErrorList tracking(int maxSize) {
-        return new ParseErrorList(INITIAL_CAPACITY, maxSize);
-    }
-}
diff --git a/src/org/jsoup/parser/Parser.java b/src/org/jsoup/parser/Parser.java
deleted file mode 100644
index 2236219c06..0000000000
--- a/src/org/jsoup/parser/Parser.java
+++ /dev/null
@@ -1,157 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.nodes.Node;
-
-import java.util.List;
-
-/**
- * Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the  more convenient parse methods
- * in {@link org.jsoup.Jsoup}.
- */
-public class Parser {
-    private static final int DEFAULT_MAX_ERRORS = 0; // by default, error tracking is disabled.
-    
-    private TreeBuilder treeBuilder;
-    private int maxErrors = DEFAULT_MAX_ERRORS;
-    private ParseErrorList errors;
-
-    /**
-     * Create a new Parser, using the specified TreeBuilder
-     * @param treeBuilder TreeBuilder to use to parse input into Documents.
-     */
-    public Parser(TreeBuilder treeBuilder) {
-        this.treeBuilder = treeBuilder;
-    }
-    
-    public Document parseInput(String html, String baseUri) {
-        errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking();
-        Document doc = treeBuilder.parse(html, baseUri, errors);
-        return doc;
-    }
-
-    // gets & sets
-    /**
-     * Get the TreeBuilder currently in use.
-     * @return current TreeBuilder.
-     */
-    public TreeBuilder getTreeBuilder() {
-        return treeBuilder;
-    }
-
-    /**
-     * Update the TreeBuilder used when parsing content.
-     * @param treeBuilder current TreeBuilder
-     * @return this, for chaining
-     */
-    public Parser setTreeBuilder(TreeBuilder treeBuilder) {
-        this.treeBuilder = treeBuilder;
-        return this;
-    }
-
-    /**
-     * Check if parse error tracking is enabled.
-     * @return current track error state.
-     */
-    public boolean isTrackErrors() {
-        return maxErrors > 0;
-    }
-
-    /**
-     * Enable or disable parse error tracking for the next parse.
-     * @param maxErrors the maximum number of errors to track. Set to 0 to disable.
-     * @return this, for chaining
-     */
-    public Parser setTrackErrors(int maxErrors) {
-        this.maxErrors = maxErrors;
-        return this;
-    }
-
-    /**
-     * Retrieve the parse errors, if any, from the last parse.
-     * @return list of parse errors, up to the size of the maximum errors tracked.
-     */
-    public List<ParseError> getErrors() {
-        return errors;
-    }
-
-    // static parse functions below
-    /**
-     * Parse HTML into a Document.
-     *
-     * @param html HTML to parse
-     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
-     *
-     * @return parsed Document
-     */
-    public static Document parse(String html, String baseUri) {
-        TreeBuilder treeBuilder = new HtmlTreeBuilder();
-        return treeBuilder.parse(html, baseUri, ParseErrorList.noTracking());
-    }
-
-    /**
-     * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
-     *
-     * @param fragmentHtml the fragment of HTML to parse
-     * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
-     * provides stack context (for implicit element creation).
-     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
-     *
-     * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
-     */
-    public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) {
-        HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();
-        return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking());
-    }
-
-    /**
-     * Parse a fragment of HTML into the {@code body} of a Document.
-     *
-     * @param bodyHtml fragment of HTML
-     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
-     *
-     * @return Document, with empty head, and HTML parsed into body
-     */
-    public static Document parseBodyFragment(String bodyHtml, String baseUri) {
-        Document doc = Document.createShell(baseUri);
-        Element body = doc.body();
-        List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
-        Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
-        for (Node node : nodes) {
-            body.appendChild(node);
-        }
-        return doc;
-    }
-
-    /**
-     * @param bodyHtml HTML to parse
-     * @param baseUri baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
-     *
-     * @return parsed Document
-     * @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment} instead.
-     */
-    public static Document parseBodyFragmentRelaxed(String bodyHtml, String baseUri) {
-        return parse(bodyHtml, baseUri);
-    }
-    
-    // builders
-
-    /**
-     * Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document,
-     * based on a knowledge of the semantics of the incoming tags.
-     * @return a new HTML parser.
-     */
-    public static Parser htmlParser() {
-        return new Parser(new HtmlTreeBuilder());
-    }
-
-    /**
-     * Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML,
-     * rather creates a simple tree directly from the input.
-     * @return a new simple XML parser.
-     */
-    public static Parser xmlParser() {
-        return new Parser(new XmlTreeBuilder());
-    }
-}
diff --git a/src/org/jsoup/parser/Tag.java b/src/org/jsoup/parser/Tag.java
deleted file mode 100644
index 40b7557b39..0000000000
--- a/src/org/jsoup/parser/Tag.java
+++ /dev/null
@@ -1,262 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * HTML Tag capabilities.
- *
- * @author Jonathan Hedley, jonathan@hedley.net
- */
-public class Tag {
-    private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map of known tags
-
-    private String tagName;
-    private boolean isBlock = true; // block or inline
-    private boolean formatAsBlock = true; // should be formatted as a block
-    private boolean canContainBlock = true; // Can this tag hold block level tags?
-    private boolean canContainInline = true; // only pcdata if not
-    private boolean empty = false; // can hold nothing; e.g. img
-    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
-    private boolean preserveWhitespace = false; // for pre, textarea, script etc
-
-    private Tag(String tagName) {
-        this.tagName = tagName.toLowerCase();
-    }
-
-    /**
-     * Get this tag's name.
-     *
-     * @return the tag's name
-     */
-    public String getName() {
-        return tagName;
-    }
-
-    /**
-     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
-     * <p/>
-     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
-     *
-     * @param tagName Name of tag, e.g. "p". Case insensitive.
-     * @return The tag, either defined or new generic.
-     */
-    public static Tag valueOf(String tagName) {
-        Validate.notNull(tagName);
-        tagName = tagName.trim().toLowerCase();
-        Validate.notEmpty(tagName);
-
-        synchronized (tags) {
-            Tag tag = tags.get(tagName);
-            if (tag == null) {
-                // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
-                tag = new Tag(tagName);
-                tag.isBlock = false;
-                tag.canContainBlock = true;
-            }
-            return tag;
-        }
-    }
-
-    /**
-     * Gets if this is a block tag.
-     *
-     * @return if block tag
-     */
-    public boolean isBlock() {
-        return isBlock;
-    }
-
-    /**
-     * Gets if this tag should be formatted as a block (or as inline)
-     *
-     * @return if should be formatted as block or inline
-     */
-    public boolean formatAsBlock() {
-        return formatAsBlock;
-    }
-
-    /**
-     * Gets if this tag can contain block tags.
-     *
-     * @return if tag can contain block tags
-     */
-    public boolean canContainBlock() {
-        return canContainBlock;
-    }
-
-    /**
-     * Gets if this tag is an inline tag.
-     *
-     * @return if this tag is an inline tag.
-     */
-    public boolean isInline() {
-        return !isBlock;
-    }
-
-    /**
-     * Gets if this tag is a data only tag.
-     *
-     * @return if this tag is a data only tag
-     */
-    public boolean isData() {
-        return !canContainInline && !isEmpty();
-    }
-
-    /**
-     * Get if this is an empty tag
-     *
-     * @return if this is an empty tag
-     */
-    public boolean isEmpty() {
-        return empty;
-    }
-
-    /**
-     * Get if this tag is self closing.
-     *
-     * @return if this tag should be output as self closing.
-     */
-    public boolean isSelfClosing() {
-        return empty || selfClosing;
-    }
-
-    /**
-     * Get if this is a pre-defined tag, or was auto created on parsing.
-     *
-     * @return if a known tag
-     */
-    public boolean isKnownTag() {
-        return tags.containsKey(tagName);
-    }
-
-    /**
-     * Check if this tagname is a known tag.
-     *
-     * @param tagName name of tag
-     * @return if known HTML tag
-     */
-    public static boolean isKnownTag(String tagName) {
-        return tags.containsKey(tagName);
-    }
-
-    /**
-     * Get if this tag should preserve whitespace within child text nodes.
-     *
-     * @return if preserve whitepace
-     */
-    public boolean preserveWhitespace() {
-        return preserveWhitespace;
-    }
-
-    Tag setSelfClosing() {
-        selfClosing = true;
-        return this;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (!(o instanceof Tag)) return false;
-
-        Tag tag = (Tag) o;
-
-        if (canContainBlock != tag.canContainBlock) return false;
-        if (canContainInline != tag.canContainInline) return false;
-        if (empty != tag.empty) return false;
-        if (formatAsBlock != tag.formatAsBlock) return false;
-        if (isBlock != tag.isBlock) return false;
-        if (preserveWhitespace != tag.preserveWhitespace) return false;
-        if (selfClosing != tag.selfClosing) return false;
-        if (!tagName.equals(tag.tagName)) return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        int result = tagName.hashCode();
-        result = 31 * result + (isBlock ? 1 : 0);
-        result = 31 * result + (formatAsBlock ? 1 : 0);
-        result = 31 * result + (canContainBlock ? 1 : 0);
-        result = 31 * result + (canContainInline ? 1 : 0);
-        result = 31 * result + (empty ? 1 : 0);
-        result = 31 * result + (selfClosing ? 1 : 0);
-        result = 31 * result + (preserveWhitespace ? 1 : 0);
-        return result;
-    }
-
-    public String toString() {
-        return tagName;
-    }
-
-    // internal static initialisers:
-    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
-    private static final String[] blockTags = {
-            "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
-            "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
-            "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
-            "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
-            "td", "video", "audio", "canvas", "details", "menu", "plaintext"
-    };
-    private static final String[] inlineTags = {
-            "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
-            "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
-            "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
-            "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
-            "summary", "command", "device"
-    };
-    private static final String[] emptyTags = {
-            "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
-            "device"
-    };
-    private static final String[] formatAsInlineTags = {
-            "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style"
-    };
-    private static final String[] preserveWhitespaceTags = {"pre", "plaintext", "title"};
-
-    static {
-        // creates
-        for (String tagName : blockTags) {
-            Tag tag = new Tag(tagName);
-            register(tag);
-        }
-        for (String tagName : inlineTags) {
-            Tag tag = new Tag(tagName);
-            tag.isBlock = false;
-            tag.canContainBlock = false;
-            tag.formatAsBlock = false;
-            register(tag);
-        }
-
-        // mods:
-        for (String tagName : emptyTags) {
-            Tag tag = tags.get(tagName);
-            Validate.notNull(tag);
-            tag.canContainBlock = false;
-            tag.canContainInline = false;
-            tag.empty = true;
-        }
-
-        for (String tagName : formatAsInlineTags) {
-            Tag tag = tags.get(tagName);
-            Validate.notNull(tag);
-            tag.formatAsBlock = false;
-        }
-
-        for (String tagName : preserveWhitespaceTags) {
-            Tag tag = tags.get(tagName);
-            Validate.notNull(tag);
-            tag.preserveWhitespace = true;
-        }
-    }
-
-    private static Tag register(Tag tag) {
-        synchronized (tags) {
-            tags.put(tag.tagName, tag);
-        }
-        return tag;
-    }
-}
diff --git a/src/org/jsoup/parser/Token.java b/src/org/jsoup/parser/Token.java
deleted file mode 100644
index 9f4f9e250d..0000000000
--- a/src/org/jsoup/parser/Token.java
+++ /dev/null
@@ -1,252 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Attribute;
-import org.jsoup.nodes.Attributes;
-
-/**
- * Parse tokens for the Tokeniser.
- */
-abstract class Token {
-    TokenType type;
-
-    private Token() {
-    }
-    
-    String tokenType() {
-        return this.getClass().getSimpleName();
-    }
-
-    static class Doctype extends Token {
-        final StringBuilder name = new StringBuilder();
-        final StringBuilder publicIdentifier = new StringBuilder();
-        final StringBuilder systemIdentifier = new StringBuilder();
-        boolean forceQuirks = false;
-
-        Doctype() {
-            type = TokenType.Doctype;
-        }
-
-        String getName() {
-            return name.toString();
-        }
-
-        String getPublicIdentifier() {
-            return publicIdentifier.toString();
-        }
-
-        public String getSystemIdentifier() {
-            return systemIdentifier.toString();
-        }
-
-        public boolean isForceQuirks() {
-            return forceQuirks;
-        }
-    }
-
-    static abstract class Tag extends Token {
-        protected String tagName;
-        private String pendingAttributeName;
-        private String pendingAttributeValue;
-
-        boolean selfClosing = false;
-        Attributes attributes = new Attributes(); // todo: allow nodes to not have attributes
-
-        void newAttribute() {
-            if (pendingAttributeName != null) {
-                if (pendingAttributeValue == null)
-                    pendingAttributeValue = "";
-                Attribute attribute = new Attribute(pendingAttributeName, pendingAttributeValue);
-                attributes.put(attribute);
-            }
-            pendingAttributeName = null;
-            pendingAttributeValue = null;
-        }
-
-        void finaliseTag() {
-            // finalises for emit
-            if (pendingAttributeName != null) {
-                // todo: check if attribute name exists; if so, drop and error
-                newAttribute();
-            }
-        }
-
-        String name() {
-            Validate.isFalse(tagName.length() == 0);
-            return tagName;
-        }
-
-        Tag name(String name) {
-            tagName = name;
-            return this;
-        }
-
-        boolean isSelfClosing() {
-            return selfClosing;
-        }
-
-        @SuppressWarnings({"TypeMayBeWeakened"})
-        Attributes getAttributes() {
-            return attributes;
-        }
-
-        // these appenders are rarely hit in not null state-- caused by null chars.
-        void appendTagName(String append) {
-            tagName = tagName == null ? append : tagName.concat(append);
-        }
-
-        void appendTagName(char append) {
-            appendTagName(String.valueOf(append));
-        }
-
-        void appendAttributeName(String append) {
-            pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
-        }
-
-        void appendAttributeName(char append) {
-            appendAttributeName(String.valueOf(append));
-        }
-
-        void appendAttributeValue(String append) {
-            pendingAttributeValue = pendingAttributeValue == null ? append : pendingAttributeValue.concat(append);
-        }
-
-        void appendAttributeValue(char append) {
-            appendAttributeValue(String.valueOf(append));
-        }
-    }
-
-    static class StartTag extends Tag {
-        StartTag() {
-            super();
-            type = TokenType.StartTag;
-        }
-
-        StartTag(String name) {
-            this();
-            this.tagName = name;
-        }
-
-        StartTag(String name, Attributes attributes) {
-            this();
-            this.tagName = name;
-            this.attributes = attributes;
-        }
-
-        @Override
-        public String toString() {
-            return "<" + name() + " " + attributes.toString() + ">";
-        }
-    }
-
-    static class EndTag extends Tag{
-        EndTag() {
-            super();
-            type = TokenType.EndTag;
-        }
-
-        EndTag(String name) {
-            this();
-            this.tagName = name;
-        }
-
-        @Override
-        public String toString() {
-            return "</" + name() + " " + attributes.toString() + ">";
-        }
-    }
-
-    static class Comment extends Token {
-        final StringBuilder data = new StringBuilder();
-
-        Comment() {
-            type = TokenType.Comment;
-        }
-
-        String getData() {
-            return data.toString();
-        }
-
-        @Override
-        public String toString() {
-            return "<!--" + getData() + "-->";
-        }
-    }
-
-    static class Character extends Token {
-        private final String data;
-
-        Character(String data) {
-            type = TokenType.Character;
-            this.data = data;
-        }
-
-        String getData() {
-            return data;
-        }
-
-        @Override
-        public String toString() {
-            return getData();
-        }
-    }
-
-    static class EOF extends Token {
-        EOF() {
-            type = Token.TokenType.EOF;
-        }
-    }
-
-    boolean isDoctype() {
-        return type == TokenType.Doctype;
-    }
-
-    Doctype asDoctype() {
-        return (Doctype) this;
-    }
-
-    boolean isStartTag() {
-        return type == TokenType.StartTag;
-    }
-
-    StartTag asStartTag() {
-        return (StartTag) this;
-    }
-
-    boolean isEndTag() {
-        return type == TokenType.EndTag;
-    }
-
-    EndTag asEndTag() {
-        return (EndTag) this;
-    }
-
-    boolean isComment() {
-        return type == TokenType.Comment;
-    }
-
-    Comment asComment() {
-        return (Comment) this;
-    }
-
-    boolean isCharacter() {
-        return type == TokenType.Character;
-    }
-
-    Character asCharacter() {
-        return (Character) this;
-    }
-
-    boolean isEOF() {
-        return type == TokenType.EOF;
-    }
-
-    enum TokenType {
-        Doctype,
-        StartTag,
-        EndTag,
-        Comment,
-        Character,
-        EOF
-    }
-}
diff --git a/src/org/jsoup/parser/TokenQueue.java b/src/org/jsoup/parser/TokenQueue.java
deleted file mode 100644
index a2fdfe621a..0000000000
--- a/src/org/jsoup/parser/TokenQueue.java
+++ /dev/null
@@ -1,393 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-
-/**
- * A character queue with parsing helpers.
- *
- * @author Jonathan Hedley
- */
-public class TokenQueue {
-    private String queue;
-    private int pos = 0;
-    
-    private static final char ESC = '\\'; // escape char for chomp balanced.
-
-    /**
-     Create a new TokenQueue.
-     @param data string of data to back queue.
-     */
-    public TokenQueue(String data) {
-        Validate.notNull(data);
-        queue = data;
-    }
-
-    /**
-     * Is the queue empty?
-     * @return true if no data left in queue.
-     */
-    public boolean isEmpty() {
-        return remainingLength() == 0;
-    }
-    
-    private int remainingLength() {
-        return queue.length() - pos;
-    }
-
-    /**
-     * Retrieves but does not remove the first character from the queue.
-     * @return First character, or 0 if empty.
-     */
-    public char peek() {
-        return isEmpty() ? 0 : queue.charAt(pos);
-    }
-
-    /**
-     Add a character to the start of the queue (will be the next character retrieved).
-     @param c character to add
-     */
-    public void addFirst(Character c) {
-        addFirst(c.toString());
-    }
-
-    /**
-     Add a string to the start of the queue.
-     @param seq string to add.
-     */
-    public void addFirst(String seq) {
-        // not very performant, but an edge case
-        queue = seq + queue.substring(pos);
-        pos = 0;
-    }
-
-    /**
-     * Tests if the next characters on the queue match the sequence. Case insensitive.
-     * @param seq String to check queue for.
-     * @return true if the next characters match.
-     */
-    public boolean matches(String seq) {
-        return queue.regionMatches(true, pos, seq, 0, seq.length());
-    }
-
-    /**
-     * Case sensitive match test.
-     * @param seq string to case sensitively check for
-     * @return true if matched, false if not
-     */
-    public boolean matchesCS(String seq) {
-        return queue.startsWith(seq, pos);
-    }
-    
-
-    /**
-     Tests if the next characters match any of the sequences. Case insensitive.
-     @param seq list of strings to case insensitively check for
-     @return true of any matched, false if none did
-     */
-    public boolean matchesAny(String... seq) {
-        for (String s : seq) {
-            if (matches(s))
-                return true;
-        }
-        return false;
-    }
-
-    public boolean matchesAny(char... seq) {
-        if (isEmpty())
-            return false;
-
-        for (char c: seq) {
-            if (queue.charAt(pos) == c)
-                return true;
-        }
-        return false;
-    }
-
-    public boolean matchesStartTag() {
-        // micro opt for matching "<x"
-        return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));
-    }
-
-    /**
-     * Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
-     * queue.
-     * @param seq String to search for, and if found, remove from queue.
-     * @return true if found and removed, false if not found.
-     */
-    public boolean matchChomp(String seq) {
-        if (matches(seq)) {
-            pos += seq.length();
-            return true;
-        } else {
-            return false;
-        }
-    }
-
-    /**
-     Tests if queue starts with a whitespace character.
-     @return if starts with whitespace
-     */
-    public boolean matchesWhitespace() {
-        return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
-    }
-
-    /**
-     Test if the queue matches a word character (letter or digit).
-     @return if matches a word character
-     */
-    public boolean matchesWord() {
-        return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
-    }
-
-    /**
-     * Drops the next character off the queue.
-     */
-    public void advance() {
-        if (!isEmpty()) pos++;
-    }
-
-    /**
-     * Consume one character off queue.
-     * @return first character on queue.
-     */
-    public char consume() {
-        return queue.charAt(pos++);
-    }
-
-    /**
-     * Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
-     * throw an illegal state exception -- but you should be running match() against that condition.
-     <p>
-     Case insensitive.
-     * @param seq sequence to remove from head of queue.
-     */
-    public void consume(String seq) {
-        if (!matches(seq))
-            throw new IllegalStateException("Queue did not match expected sequence");
-        int len = seq.length();
-        if (len > remainingLength())
-            throw new IllegalStateException("Queue not long enough to consume sequence");
-        
-        pos += len;
-    }
-
-    /**
-     * Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
-     * @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
-     * @return The matched data consumed from queue.
-     */
-    public String consumeTo(String seq) {
-        int offset = queue.indexOf(seq, pos);
-        if (offset != -1) {
-            String consumed = queue.substring(pos, offset);
-            pos += consumed.length();
-            return consumed;
-        } else {
-            return remainder();
-        }
-    }
-    
-    public String consumeToIgnoreCase(String seq) {
-        int start = pos;
-        String first = seq.substring(0, 1);
-        boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
-        while (!isEmpty()) {
-            if (matches(seq))
-                break;
-            
-            if (canScan) {
-                int skip = queue.indexOf(first, pos) - pos;
-                if (skip == 0) // this char is the skip char, but not match, so force advance of pos
-                    pos++;
-                else if (skip < 0) // no chance of finding, grab to end
-                    pos = queue.length();
-                else
-                    pos += skip;
-            }
-            else
-                pos++;
-        }
-
-        String data = queue.substring(start, pos); 
-        return data; 
-    }
-
-    /**
-     Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
-     @param seq any number of terminators to consume to. <b>Case insensitive.</b>
-     @return consumed string   
-     */
-    // todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
-    // is is a case sensitive time...
-    public String consumeToAny(String... seq) {
-        int start = pos;
-        while (!isEmpty() && !matchesAny(seq)) {
-            pos++;
-        }
-
-        String data = queue.substring(start, pos); 
-        return data; 
-    }
-
-    /**
-     * Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
-     * <p>
-     * If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
-     * isEmpty() == true).
-     * @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
-     * @return Data matched from queue.
-     */
-    public String chompTo(String seq) {
-        String data = consumeTo(seq);
-        matchChomp(seq);
-        return data;
-    }
-    
-    public String chompToIgnoreCase(String seq) {
-        String data = consumeToIgnoreCase(seq); // case insensitive scan
-        matchChomp(seq);
-        return data;
-    }
-
-    /**
-     * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
-     * and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
-     * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
-     * contains text strings; use unescape for that.
-     * @param open opener
-     * @param close closer
-     * @return data matched from the queue
-     */
-    public String chompBalanced(char open, char close) {
-        StringBuilder accum = new StringBuilder();
-        int depth = 0;
-        char last = 0;
-
-        do {
-            if (isEmpty()) break;
-            Character c = consume();
-            if (last == 0 || last != ESC) {
-                if (c.equals(open))
-                    depth++;
-                else if (c.equals(close))
-                    depth--;
-            }
-
-            if (depth > 0 && last != 0)
-                accum.append(c); // don't include the outer match pair in the return
-            last = c;
-        } while (depth > 0);
-        return accum.toString();
-    }
-    
-    /**
-     * Unescaped a \ escaped string.
-     * @param in backslash escaped string
-     * @return unescaped string
-     */
-    public static String unescape(String in) {
-        StringBuilder out = new StringBuilder();
-        char last = 0;
-        for (char c : in.toCharArray()) {
-            if (c == ESC) {
-                if (last != 0 && last == ESC)
-                    out.append(c);
-            }
-            else 
-                out.append(c);
-            last = c;
-        }
-        return out.toString();
-    }
-
-    /**
-     * Pulls the next run of whitespace characters of the queue.
-     */
-    public boolean consumeWhitespace() {
-        boolean seen = false;
-        while (matchesWhitespace()) {
-            pos++;
-            seen = true;
-        }
-        return seen;
-    }
-
-    /**
-     * Retrieves the next run of word type (letter or digit) off the queue.
-     * @return String of word characters from queue, or empty string if none.
-     */
-    public String consumeWord() {
-        int start = pos;
-        while (matchesWord())
-            pos++;
-        return queue.substring(start, pos);
-    }
-    
-    /**
-     * Consume an tag name off the queue (word or :, _, -)
-     * 
-     * @return tag name
-     */
-    public String consumeTagName() {
-        int start = pos;
-        while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
-            pos++;
-        
-        return queue.substring(start, pos);
-    }
-    
-    /**
-     * Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
-     * 
-     * @return tag name
-     */
-    public String consumeElementSelector() {
-        int start = pos;
-        while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
-            pos++;
-        
-        return queue.substring(start, pos);
-    }
-
-    /**
-     Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
-     http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
-     @return identifier
-     */
-    public String consumeCssIdentifier() {
-        int start = pos;
-        while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
-            pos++;
-
-        return queue.substring(start, pos);
-    }
-
-    /**
-     Consume an attribute key off the queue (letter, digit, -, _, :")
-     @return attribute key
-     */
-    public String consumeAttributeKey() {
-        int start = pos;
-        while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
-            pos++;
-        
-        return queue.substring(start, pos);
-    }
-
-    /**
-     Consume and return whatever is left on the queue.
-     @return remained of queue.
-     */
-    public String remainder() {
-        StringBuilder accum = new StringBuilder();
-        while (!isEmpty()) {
-            accum.append(consume());
-        }
-        return accum.toString();
-    }
-    
-    public String toString() {
-        return queue.substring(pos);
-    }
-}
diff --git a/src/org/jsoup/parser/Tokeniser.java b/src/org/jsoup/parser/Tokeniser.java
deleted file mode 100644
index ce6ee690d6..0000000000
--- a/src/org/jsoup/parser/Tokeniser.java
+++ /dev/null
@@ -1,230 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Entities;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Readers the input stream into tokens.
- */
-class Tokeniser {
-    static final char replacementChar = '\uFFFD'; // replaces null character
-
-    private CharacterReader reader; // html input
-    private ParseErrorList errors; // errors found while tokenising
-
-    private TokeniserState state = TokeniserState.Data; // current tokenisation state
-    private Token emitPending; // the token we are about to emit on next read
-    private boolean isEmitPending = false;
-    private StringBuilder charBuffer = new StringBuilder(); // buffers characters to output as one token
-    StringBuilder dataBuffer; // buffers data looking for </script>
-
-    Token.Tag tagPending; // tag we are building up
-    Token.Doctype doctypePending; // doctype building up
-    Token.Comment commentPending; // comment building up
-    private Token.StartTag lastStartTag; // the last start tag emitted, to test appropriate end tag
-    private boolean selfClosingFlagAcknowledged = true;
-
-    Tokeniser(CharacterReader reader, ParseErrorList errors) {
-        this.reader = reader;
-        this.errors = errors;
-    }
-
-    Token read() {
-        if (!selfClosingFlagAcknowledged) {
-            error("Self closing flag not acknowledged");
-            selfClosingFlagAcknowledged = true;
-        }
-
-        while (!isEmitPending)
-            state.read(this, reader);
-
-        // if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
-        if (charBuffer.length() > 0) {
-            String str = charBuffer.toString();
-            charBuffer.delete(0, charBuffer.length());
-            return new Token.Character(str);
-        } else {
-            isEmitPending = false;
-            return emitPending;
-        }
-    }
-
-    void emit(Token token) {
-        Validate.isFalse(isEmitPending, "There is an unread token pending!");
-
-        emitPending = token;
-        isEmitPending = true;
-
-        if (token.type == Token.TokenType.StartTag) {
-            Token.StartTag startTag = (Token.StartTag) token;
-            lastStartTag = startTag;
-            if (startTag.selfClosing)
-                selfClosingFlagAcknowledged = false;
-        } else if (token.type == Token.TokenType.EndTag) {
-            Token.EndTag endTag = (Token.EndTag) token;
-            if (endTag.attributes.size() > 0)
-                error("Attributes incorrectly present on end tag");
-        }
-    }
-
-    void emit(String str) {
-        // buffer strings up until last string token found, to emit only one token for a run of character refs etc.
-        // does not set isEmitPending; read checks that
-        charBuffer.append(str);
-    }
-
-    void emit(char c) {
-        charBuffer.append(c);
-    }
-
-    TokeniserState getState() {
-        return state;
-    }
-
-    void transition(TokeniserState state) {
-        this.state = state;
-    }
-
-    void advanceTransition(TokeniserState state) {
-        reader.advance();
-        this.state = state;
-    }
-
-    void acknowledgeSelfClosingFlag() {
-        selfClosingFlagAcknowledged = true;
-    }
-
-    Character consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
-        if (reader.isEmpty())
-            return null;
-        if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
-            return null;
-        if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&'))
-            return null;
-
-        reader.mark();
-        if (reader.matchConsume("#")) { // numbered
-            boolean isHexMode = reader.matchConsumeIgnoreCase("X");
-            String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
-            if (numRef.length() == 0) { // didn't match anything
-                characterReferenceError("numeric reference with no numerals");
-                reader.rewindToMark();
-                return null;
-            }
-            if (!reader.matchConsume(";"))
-                characterReferenceError("missing semicolon"); // missing semi
-            int charval = -1;
-            try {
-                int base = isHexMode ? 16 : 10;
-                charval = Integer.valueOf(numRef, base);
-            } catch (NumberFormatException e) {
-            } // skip
-            if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
-                characterReferenceError("character outside of valid range");
-                return replacementChar;
-            } else {
-                // todo: implement number replacement table
-                // todo: check for extra illegal unicode points as parse errors
-                return (char) charval;
-            }
-        } else { // named
-            // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
-            String nameRef = reader.consumeLetterThenDigitSequence();
-            String origNameRef = new String(nameRef); // for error reporting. nameRef gets chomped looking for matches
-            boolean looksLegit = reader.matches(';');
-            boolean found = false;
-            while (nameRef.length() > 0 && !found) {
-                if (Entities.isNamedEntity(nameRef))
-                    found = true;
-                else {
-                    nameRef = nameRef.substring(0, nameRef.length()-1);
-                    reader.unconsume();
-                }
-            }
-            if (!found) {
-                if (looksLegit) // named with semicolon
-                    characterReferenceError(String.format("invalid named referenece '%s'", origNameRef));
-                reader.rewindToMark();
-                return null;
-            }
-            if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
-                // don't want that to match
-                reader.rewindToMark();
-                return null;
-            }
-            if (!reader.matchConsume(";"))
-                characterReferenceError("missing semicolon"); // missing semi
-            return Entities.getCharacterByName(nameRef);
-        }
-    }
-
-    Token.Tag createTagPending(boolean start) {
-        tagPending = start ? new Token.StartTag() : new Token.EndTag();
-        return tagPending;
-    }
-
-    void emitTagPending() {
-        tagPending.finaliseTag();
-        emit(tagPending);
-    }
-
-    void createCommentPending() {
-        commentPending = new Token.Comment();
-    }
-
-    void emitCommentPending() {
-        emit(commentPending);
-    }
-
-    void createDoctypePending() {
-        doctypePending = new Token.Doctype();
-    }
-
-    void emitDoctypePending() {
-        emit(doctypePending);
-    }
-
-    void createTempBuffer() {
-        dataBuffer = new StringBuilder();
-    }
-
-    boolean isAppropriateEndTagToken() {
-        if (lastStartTag == null)
-            return false;
-        return tagPending.tagName.equals(lastStartTag.tagName);
-    }
-
-    String appropriateEndTagName() {
-        return lastStartTag.tagName;
-    }
-
-    void error(TokeniserState state) {
-        if (errors.canAddError())
-            errors.add(new ParseError(reader.pos(), "Unexpected character '%s' in input state [%s]", reader.current(), state));
-    }
-
-    void eofError(TokeniserState state) {
-        if (errors.canAddError())
-            errors.add(new ParseError(reader.pos(), "Unexpectedly reached end of file (EOF) in input state [%s]", state));
-    }
-
-    private void characterReferenceError(String message) {
-        if (errors.canAddError())
-            errors.add(new ParseError(reader.pos(), "Invalid character reference: %s", message));
-    }
-
-    private void error(String errorMsg) {
-        if (errors.canAddError())
-            errors.add(new ParseError(reader.pos(), errorMsg));
-    }
-
-    boolean currentNodeInHtmlNS() {
-        // todo: implement namespaces correctly
-        return true;
-        // Element currentNode = currentNode();
-        // return currentNode != null && currentNode.namespace().equals("HTML");
-    }
-}
diff --git a/src/org/jsoup/parser/TokeniserState.java b/src/org/jsoup/parser/TokeniserState.java
deleted file mode 100644
index e3013c73e9..0000000000
--- a/src/org/jsoup/parser/TokeniserState.java
+++ /dev/null
@@ -1,1778 +0,0 @@
-package org.jsoup.parser;
-
-/**
- * States and transition activations for the Tokeniser.
- */
-enum TokeniserState {
-    Data {
-        // in data state, gather characters until a character reference or tag is found
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case '&':
-                    t.advanceTransition(CharacterReferenceInData);
-                    break;
-                case '<':
-                    t.advanceTransition(TagOpen);
-                    break;
-                case nullChar:
-                    t.error(this); // NOT replacement character (oddly?)
-                    t.emit(r.consume());
-                    break;
-                case eof:
-                    t.emit(new Token.EOF());
-                    break;
-                default:
-                    String data = r.consumeToAny('&', '<', nullChar);
-                    t.emit(data);
-                    break;
-            }
-        }
-    },
-    CharacterReferenceInData {
-        // from & in data
-        void read(Tokeniser t, CharacterReader r) {
-            Character c = t.consumeCharacterReference(null, false);
-            if (c == null)
-                t.emit('&');
-            else
-                t.emit(c);
-            t.transition(Data);
-        }
-    },
-    Rcdata {
-        /// handles data in title, textarea etc
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case '&':
-                    t.advanceTransition(CharacterReferenceInRcdata);
-                    break;
-                case '<':
-                    t.advanceTransition(RcdataLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                case eof:
-                    t.emit(new Token.EOF());
-                    break;
-                default:
-                    String data = r.consumeToAny('&', '<', nullChar);
-                    t.emit(data);
-                    break;
-            }
-        }
-    },
-    CharacterReferenceInRcdata {
-        void read(Tokeniser t, CharacterReader r) {
-            Character c = t.consumeCharacterReference(null, false);
-            if (c == null)
-                t.emit('&');
-            else
-                t.emit(c);
-            t.transition(Rcdata);
-        }
-    },
-    Rawtext {
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case '<':
-                    t.advanceTransition(RawtextLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                case eof:
-                    t.emit(new Token.EOF());
-                    break;
-                default:
-                    String data = r.consumeToAny('<', nullChar);
-                    t.emit(data);
-                    break;
-            }
-        }
-    },
-    ScriptData {
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case '<':
-                    t.advanceTransition(ScriptDataLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                case eof:
-                    t.emit(new Token.EOF());
-                    break;
-                default:
-                    String data = r.consumeToAny('<', nullChar);
-                    t.emit(data);
-                    break;
-            }
-        }
-    },
-    PLAINTEXT {
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                case eof:
-                    t.emit(new Token.EOF());
-                    break;
-                default:
-                    String data = r.consumeTo(nullChar);
-                    t.emit(data);
-                    break;
-            }
-        }
-    },
-    TagOpen {
-        // from < in data
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.current()) {
-                case '!':
-                    t.advanceTransition(MarkupDeclarationOpen);
-                    break;
-                case '/':
-                    t.advanceTransition(EndTagOpen);
-                    break;
-                case '?':
-                    t.advanceTransition(BogusComment);
-                    break;
-                default:
-                    if (r.matchesLetter()) {
-                        t.createTagPending(true);
-                        t.transition(TagName);
-                    } else {
-                        t.error(this);
-                        t.emit('<'); // char that got us here
-                        t.transition(Data);
-                    }
-                    break;
-            }
-        }
-    },
-    EndTagOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.isEmpty()) {
-                t.eofError(this);
-                t.emit("</");
-                t.transition(Data);
-            } else if (r.matchesLetter()) {
-                t.createTagPending(false);
-                t.transition(TagName);
-            } else if (r.matches('>')) {
-                t.error(this);
-                t.advanceTransition(Data);
-            } else {
-                t.error(this);
-                t.advanceTransition(BogusComment);
-            }
-        }
-    },
-    TagName {
-        // from < or </ in data, will have start or end tag pending
-        void read(Tokeniser t, CharacterReader r) {
-            // previous TagOpen state did NOT consume, will have a letter char in current
-            String tagName = r.consumeToAny('\t', '\n', '\f', ' ', '/', '>', nullChar).toLowerCase();
-            t.tagPending.appendTagName(tagName);
-
-            switch (r.consume()) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeAttributeName);
-                    break;
-                case '/':
-                    t.transition(SelfClosingStartTag);
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case nullChar: // replacement
-                    t.tagPending.appendTagName(replacementStr);
-                    break;
-                case eof: // should emit pending tag?
-                    t.eofError(this);
-                    t.transition(Data);
-                // no default, as covered with above consumeToAny
-            }
-        }
-    },
-    RcdataLessthanSign {
-        // from < in rcdata
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matches('/')) {
-                t.createTempBuffer();
-                t.advanceTransition(RCDATAEndTagOpen);
-            } else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
-                // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
-                // consuming to EOF; break out here
-                t.tagPending = new Token.EndTag(t.appropriateEndTagName());
-                t.emitTagPending();
-                r.unconsume(); // undo "<"
-                t.transition(Data);
-            } else {
-                t.emit("<");
-                t.transition(Rcdata);
-            }
-        }
-    },
-    RCDATAEndTagOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createTagPending(false);
-                t.tagPending.appendTagName(Character.toLowerCase(r.current()));
-                t.dataBuffer.append(Character.toLowerCase(r.current()));
-                t.advanceTransition(RCDATAEndTagName);
-            } else {
-                t.emit("</");
-                t.transition(Rcdata);
-            }
-        }
-    },
-    RCDATAEndTagName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.tagPending.appendTagName(name.toLowerCase());
-                t.dataBuffer.append(name);
-                return;
-            }
-
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    if (t.isAppropriateEndTagToken())
-                        t.transition(BeforeAttributeName);
-                    else
-                        anythingElse(t, r);
-                    break;
-                case '/':
-                    if (t.isAppropriateEndTagToken())
-                        t.transition(SelfClosingStartTag);
-                    else
-                        anythingElse(t, r);
-                    break;
-                case '>':
-                    if (t.isAppropriateEndTagToken()) {
-                        t.emitTagPending();
-                        t.transition(Data);
-                    }
-                    else
-                        anythingElse(t, r);
-                    break;
-                default:
-                    anythingElse(t, r);
-            }
-        }
-
-        private void anythingElse(Tokeniser t, CharacterReader r) {
-            t.emit("</" + t.dataBuffer.toString());
-            t.transition(Rcdata);
-        }
-    },
-    RawtextLessthanSign {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matches('/')) {
-                t.createTempBuffer();
-                t.advanceTransition(RawtextEndTagOpen);
-            } else {
-                t.emit('<');
-                t.transition(Rawtext);
-            }
-        }
-    },
-    RawtextEndTagOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createTagPending(false);
-                t.transition(RawtextEndTagName);
-            } else {
-                t.emit("</");
-                t.transition(Rawtext);
-            }
-        }
-    },
-    RawtextEndTagName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.tagPending.appendTagName(name.toLowerCase());
-                t.dataBuffer.append(name);
-                return;
-            }
-
-            if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
-                char c = r.consume();
-                switch (c) {
-                    case '\t':
-                    case '\n':
-                    case '\f':
-                    case ' ':
-                        t.transition(BeforeAttributeName);
-                        break;
-                    case '/':
-                        t.transition(SelfClosingStartTag);
-                        break;
-                    case '>':
-                        t.emitTagPending();
-                        t.transition(Data);
-                        break;
-                    default:
-                        t.dataBuffer.append(c);
-                        anythingElse(t, r);
-                }
-            } else
-                anythingElse(t, r);
-        }
-
-        private void anythingElse(Tokeniser t, CharacterReader r) {
-            t.emit("</" + t.dataBuffer.toString());
-            t.transition(Rawtext);
-        }
-    },
-    ScriptDataLessthanSign {
-        void read(Tokeniser t, CharacterReader r) {
-            switch (r.consume()) {
-                case '/':
-                    t.createTempBuffer();
-                    t.transition(ScriptDataEndTagOpen);
-                    break;
-                case '!':
-                    t.emit("<!");
-                    t.transition(ScriptDataEscapeStart);
-                    break;
-                default:
-                    t.emit("<");
-                    r.unconsume();
-                    t.transition(ScriptData);
-            }
-        }
-    },
-    ScriptDataEndTagOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createTagPending(false);
-                t.transition(ScriptDataEndTagName);
-            } else {
-                t.emit("</");
-                t.transition(ScriptData);
-            }
-
-        }
-    },
-    ScriptDataEndTagName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.tagPending.appendTagName(name.toLowerCase());
-                t.dataBuffer.append(name);
-                return;
-            }
-
-            if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
-                char c = r.consume();
-                switch (c) {
-                    case '\t':
-                    case '\n':
-                    case '\f':
-                    case ' ':
-                        t.transition(BeforeAttributeName);
-                        break;
-                    case '/':
-                        t.transition(SelfClosingStartTag);
-                        break;
-                    case '>':
-                        t.emitTagPending();
-                        t.transition(Data);
-                        break;
-                    default:
-                        t.dataBuffer.append(c);
-                        anythingElse(t, r);
-                }
-            } else {
-                anythingElse(t, r);
-            }
-        }
-
-        private void anythingElse(Tokeniser t, CharacterReader r) {
-            t.emit("</" + t.dataBuffer.toString());
-            t.transition(ScriptData);
-        }
-    },
-    ScriptDataEscapeStart {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matches('-')) {
-                t.emit('-');
-                t.advanceTransition(ScriptDataEscapeStartDash);
-            } else {
-                t.transition(ScriptData);
-            }
-        }
-    },
-    ScriptDataEscapeStartDash {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matches('-')) {
-                t.emit('-');
-                t.advanceTransition(ScriptDataEscapedDashDash);
-            } else {
-                t.transition(ScriptData);
-            }
-        }
-    },
-    ScriptDataEscaped {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.isEmpty()) {
-                t.eofError(this);
-                t.transition(Data);
-                return;
-            }
-
-            switch (r.current()) {
-                case '-':
-                    t.emit('-');
-                    t.advanceTransition(ScriptDataEscapedDash);
-                    break;
-                case '<':
-                    t.advanceTransition(ScriptDataEscapedLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                default:
-                    String data = r.consumeToAny('-', '<', nullChar);
-                    t.emit(data);
-            }
-        }
-    },
-    ScriptDataEscapedDash {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.isEmpty()) {
-                t.eofError(this);
-                t.transition(Data);
-                return;
-            }
-
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.emit(c);
-                    t.transition(ScriptDataEscapedDashDash);
-                    break;
-                case '<':
-                    t.transition(ScriptDataEscapedLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.emit(replacementChar);
-                    t.transition(ScriptDataEscaped);
-                    break;
-                default:
-                    t.emit(c);
-                    t.transition(ScriptDataEscaped);
-            }
-        }
-    },
-    ScriptDataEscapedDashDash {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.isEmpty()) {
-                t.eofError(this);
-                t.transition(Data);
-                return;
-            }
-
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.emit(c);
-                    break;
-                case '<':
-                    t.transition(ScriptDataEscapedLessthanSign);
-                    break;
-                case '>':
-                    t.emit(c);
-                    t.transition(ScriptData);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.emit(replacementChar);
-                    t.transition(ScriptDataEscaped);
-                    break;
-                default:
-                    t.emit(c);
-                    t.transition(ScriptDataEscaped);
-            }
-        }
-    },
-    ScriptDataEscapedLessthanSign {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createTempBuffer();
-                t.dataBuffer.append(Character.toLowerCase(r.current()));
-                t.emit("<" + r.current());
-                t.advanceTransition(ScriptDataDoubleEscapeStart);
-            } else if (r.matches('/')) {
-                t.createTempBuffer();
-                t.advanceTransition(ScriptDataEscapedEndTagOpen);
-            } else {
-                t.emit('<');
-                t.transition(ScriptDataEscaped);
-            }
-        }
-    },
-    ScriptDataEscapedEndTagOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createTagPending(false);
-                t.tagPending.appendTagName(Character.toLowerCase(r.current()));
-                t.dataBuffer.append(r.current());
-                t.advanceTransition(ScriptDataEscapedEndTagName);
-            } else {
-                t.emit("</");
-                t.transition(ScriptDataEscaped);
-            }
-        }
-    },
-    ScriptDataEscapedEndTagName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.tagPending.appendTagName(name.toLowerCase());
-                t.dataBuffer.append(name);
-                return;
-            }
-
-            if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
-                char c = r.consume();
-                switch (c) {
-                    case '\t':
-                    case '\n':
-                    case '\f':
-                    case ' ':
-                        t.transition(BeforeAttributeName);
-                        break;
-                    case '/':
-                        t.transition(SelfClosingStartTag);
-                        break;
-                    case '>':
-                        t.emitTagPending();
-                        t.transition(Data);
-                        break;
-                    default:
-                        t.dataBuffer.append(c);
-                        anythingElse(t, r);
-                        break;
-                }
-            } else {
-                anythingElse(t, r);
-            }
-        }
-        
-        private void anythingElse(Tokeniser t, CharacterReader r) {
-            t.emit("</" + t.dataBuffer.toString());
-            t.transition(ScriptDataEscaped);
-        }
-    },
-    ScriptDataDoubleEscapeStart {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.dataBuffer.append(name.toLowerCase());
-                t.emit(name);
-                return;
-            }
-
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                case '/':
-                case '>':
-                    if (t.dataBuffer.toString().equals("script"))
-                        t.transition(ScriptDataDoubleEscaped);
-                    else
-                        t.transition(ScriptDataEscaped);
-                    t.emit(c);
-                    break;
-                default:
-                    r.unconsume();
-                    t.transition(ScriptDataEscaped);
-            }
-        }
-    },
-    ScriptDataDoubleEscaped {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.current();
-            switch (c) {
-                case '-':
-                    t.emit(c);
-                    t.advanceTransition(ScriptDataDoubleEscapedDash);
-                    break;
-                case '<':
-                    t.emit(c);
-                    t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.emit(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                default:
-                    String data = r.consumeToAny('-', '<', nullChar);
-                    t.emit(data);
-            }
-        }
-    },
-    ScriptDataDoubleEscapedDash {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.emit(c);
-                    t.transition(ScriptDataDoubleEscapedDashDash);
-                    break;
-                case '<':
-                    t.emit(c);
-                    t.transition(ScriptDataDoubleEscapedLessthanSign);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.emit(replacementChar);
-                    t.transition(ScriptDataDoubleEscaped);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                default:
-                    t.emit(c);
-                    t.transition(ScriptDataDoubleEscaped);
-            }
-        }
-    },
-    ScriptDataDoubleEscapedDashDash {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.emit(c);
-                    break;
-                case '<':
-                    t.emit(c);
-                    t.transition(ScriptDataDoubleEscapedLessthanSign);
-                    break;
-                case '>':
-                    t.emit(c);
-                    t.transition(ScriptData);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.emit(replacementChar);
-                    t.transition(ScriptDataDoubleEscaped);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                default:
-                    t.emit(c);
-                    t.transition(ScriptDataDoubleEscaped);
-            }
-        }
-    },
-    ScriptDataDoubleEscapedLessthanSign {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matches('/')) {
-                t.emit('/');
-                t.createTempBuffer();
-                t.advanceTransition(ScriptDataDoubleEscapeEnd);
-            } else {
-                t.transition(ScriptDataDoubleEscaped);
-            }
-        }
-    },
-    ScriptDataDoubleEscapeEnd {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.dataBuffer.append(name.toLowerCase());
-                t.emit(name);
-                return;
-            }
-
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                case '/':
-                case '>':
-                    if (t.dataBuffer.toString().equals("script"))
-                        t.transition(ScriptDataEscaped);
-                    else
-                        t.transition(ScriptDataDoubleEscaped);
-                    t.emit(c);
-                    break;
-                default:
-                    r.unconsume();
-                    t.transition(ScriptDataDoubleEscaped);
-            }
-        }
-    },
-    BeforeAttributeName {
-        // from tagname <xxx
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break; // ignore whitespace
-                case '/':
-                    t.transition(SelfClosingStartTag);
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.newAttribute();
-                    r.unconsume();
-                    t.transition(AttributeName);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                case '"':
-                case '\'':
-                case '<':
-                case '=':
-                    t.error(this);
-                    t.tagPending.newAttribute();
-                    t.tagPending.appendAttributeName(c);
-                    t.transition(AttributeName);
-                    break;
-                default: // A-Z, anything else
-                    t.tagPending.newAttribute();
-                    r.unconsume();
-                    t.transition(AttributeName);
-            }
-        }
-    },
-    AttributeName {
-        // from before attribute name
-        void read(Tokeniser t, CharacterReader r) {
-            String name = r.consumeToAny('\t', '\n', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<');
-            t.tagPending.appendAttributeName(name.toLowerCase());
-
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(AfterAttributeName);
-                    break;
-                case '/':
-                    t.transition(SelfClosingStartTag);
-                    break;
-                case '=':
-                    t.transition(BeforeAttributeValue);
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeName(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                case '"':
-                case '\'':
-                case '<':
-                    t.error(this);
-                    t.tagPending.appendAttributeName(c);
-                // no default, as covered in consumeToAny
-            }
-        }
-    },
-    AfterAttributeName {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    // ignore
-                    break;
-                case '/':
-                    t.transition(SelfClosingStartTag);
-                    break;
-                case '=':
-                    t.transition(BeforeAttributeValue);
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeName(replacementChar);
-                    t.transition(AttributeName);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                case '"':
-                case '\'':
-                case '<':
-                    t.error(this);
-                    t.tagPending.newAttribute();
-                    t.tagPending.appendAttributeName(c);
-                    t.transition(AttributeName);
-                    break;
-                default: // A-Z, anything else
-                    t.tagPending.newAttribute();
-                    r.unconsume();
-                    t.transition(AttributeName);
-            }
-        }
-    },
-    BeforeAttributeValue {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    // ignore
-                    break;
-                case '"':
-                    t.transition(AttributeValue_doubleQuoted);
-                    break;
-                case '&':
-                    r.unconsume();
-                    t.transition(AttributeValue_unquoted);
-                    break;
-                case '\'':
-                    t.transition(AttributeValue_singleQuoted);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(replacementChar);
-                    t.transition(AttributeValue_unquoted);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case '<':
-                case '=':
-                case '`':
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(c);
-                    t.transition(AttributeValue_unquoted);
-                    break;
-                default:
-                    r.unconsume();
-                    t.transition(AttributeValue_unquoted);
-            }
-        }
-    },
-    AttributeValue_doubleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            String value = r.consumeToAny('"', '&', nullChar);
-            if (value.length() > 0)
-                t.tagPending.appendAttributeValue(value);
-
-            char c = r.consume();
-            switch (c) {
-                case '"':
-                    t.transition(AfterAttributeValue_quoted);
-                    break;
-                case '&':
-                    Character ref = t.consumeCharacterReference('"', true);
-                    if (ref != null)
-                        t.tagPending.appendAttributeValue(ref);
-                    else
-                        t.tagPending.appendAttributeValue('&');
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                // no default, handled in consume to any above
-            }
-        }
-    },
-    AttributeValue_singleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            String value = r.consumeToAny('\'', '&', nullChar);
-            if (value.length() > 0)
-                t.tagPending.appendAttributeValue(value);
-
-            char c = r.consume();
-            switch (c) {
-                case '\'':
-                    t.transition(AfterAttributeValue_quoted);
-                    break;
-                case '&':
-                    Character ref = t.consumeCharacterReference('\'', true);
-                    if (ref != null)
-                        t.tagPending.appendAttributeValue(ref);
-                    else
-                        t.tagPending.appendAttributeValue('&');
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                // no default, handled in consume to any above
-            }
-        }
-    },
-    AttributeValue_unquoted {
-        void read(Tokeniser t, CharacterReader r) {
-            String value = r.consumeToAny('\t', '\n', '\f', ' ', '&', '>', nullChar, '"', '\'', '<', '=', '`');
-            if (value.length() > 0)
-                t.tagPending.appendAttributeValue(value);
-
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeAttributeName);
-                    break;
-                case '&':
-                    Character ref = t.consumeCharacterReference('>', true);
-                    if (ref != null)
-                        t.tagPending.appendAttributeValue(ref);
-                    else
-                        t.tagPending.appendAttributeValue('&');
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                case '"':
-                case '\'':
-                case '<':
-                case '=':
-                case '`':
-                    t.error(this);
-                    t.tagPending.appendAttributeValue(c);
-                    break;
-                // no default, handled in consume to any above
-            }
-
-        }
-    },
-    // CharacterReferenceInAttributeValue state handled inline
-    AfterAttributeValue_quoted {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeAttributeName);
-                    break;
-                case '/':
-                    t.transition(SelfClosingStartTag);
-                    break;
-                case '>':
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    r.unconsume();
-                    t.transition(BeforeAttributeName);
-            }
-
-        }
-    },
-    SelfClosingStartTag {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '>':
-                    t.tagPending.selfClosing = true;
-                    t.emitTagPending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.transition(BeforeAttributeName);
-            }
-        }
-    },
-    BogusComment {
-        void read(Tokeniser t, CharacterReader r) {
-            // todo: handle bogus comment starting from eof. when does that trigger?
-            // rewind to capture character that lead us here
-            r.unconsume();
-            Token.Comment comment = new Token.Comment();
-            comment.data.append(r.consumeTo('>'));
-            // todo: replace nullChar with replaceChar
-            t.emit(comment);
-            t.advanceTransition(Data);
-        }
-    },
-    MarkupDeclarationOpen {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchConsume("--")) {
-                t.createCommentPending();
-                t.transition(CommentStart);
-            } else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
-                t.transition(Doctype);
-            } else if (r.matchConsume("[CDATA[")) {
-                // todo: should actually check current namepspace, and only non-html allows cdata. until namespace
-                // is implemented properly, keep handling as cdata
-                //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
-                t.transition(CdataSection);
-            } else {
-                t.error(this);
-                t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind
-            }
-        }
-    },
-    CommentStart {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.transition(CommentStartDash);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.commentPending.data.append(replacementChar);
-                    t.transition(Comment);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.commentPending.data.append(c);
-                    t.transition(Comment);
-            }
-        }
-    },
-    CommentStartDash {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.transition(CommentStartDash);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.commentPending.data.append(replacementChar);
-                    t.transition(Comment);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.commentPending.data.append(c);
-                    t.transition(Comment);
-            }
-        }
-    },
-    Comment {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.current();
-            switch (c) {
-                case '-':
-                    t.advanceTransition(CommentEndDash);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    r.advance();
-                    t.commentPending.data.append(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.commentPending.data.append(r.consumeToAny('-', nullChar));
-            }
-        }
-    },
-    CommentEndDash {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.transition(CommentEnd);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.commentPending.data.append('-').append(replacementChar);
-                    t.transition(Comment);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.commentPending.data.append('-').append(c);
-                    t.transition(Comment);
-            }
-        }
-    },
-    CommentEnd {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '>':
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.commentPending.data.append("--").append(replacementChar);
-                    t.transition(Comment);
-                    break;
-                case '!':
-                    t.error(this);
-                    t.transition(CommentEndBang);
-                    break;
-                case '-':
-                    t.error(this);
-                    t.commentPending.data.append('-');
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.commentPending.data.append("--").append(c);
-                    t.transition(Comment);
-            }
-        }
-    },
-    CommentEndBang {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '-':
-                    t.commentPending.data.append("--!");
-                    t.transition(CommentEndDash);
-                    break;
-                case '>':
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.commentPending.data.append("--!").append(replacementChar);
-                    t.transition(Comment);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.emitCommentPending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.commentPending.data.append("--!").append(c);
-                    t.transition(Comment);
-            }
-        }
-    },
-    Doctype {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeDoctypeName);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.createDoctypePending();
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.transition(BeforeDoctypeName);
-            }
-        }
-    },
-    BeforeDoctypeName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                t.createDoctypePending();
-                t.transition(DoctypeName);
-                return;
-            }
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break; // ignore whitespace
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.name.append(replacementChar);
-                    t.transition(DoctypeName);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.createDoctypePending();
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.createDoctypePending();
-                    t.doctypePending.name.append(c);
-                    t.transition(DoctypeName);
-            }
-        }
-    },
-    DoctypeName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.matchesLetter()) {
-                String name = r.consumeLetterSequence();
-                t.doctypePending.name.append(name.toLowerCase());
-                return;
-            }
-            char c = r.consume();
-            switch (c) {
-                case '>':
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(AfterDoctypeName);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.name.append(replacementChar);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.doctypePending.name.append(c);
-            }
-        }
-    },
-    AfterDoctypeName {
-        void read(Tokeniser t, CharacterReader r) {
-            if (r.isEmpty()) {
-                t.eofError(this);
-                t.doctypePending.forceQuirks = true;
-                t.emitDoctypePending();
-                t.transition(Data);
-                return;
-            }
-            if (r.matchesAny('\t', '\n', '\f', ' '))
-                r.advance(); // ignore whitespace
-            else if (r.matches('>')) {
-                t.emitDoctypePending();
-                t.advanceTransition(Data);
-            } else if (r.matchConsumeIgnoreCase("PUBLIC")) {
-                t.transition(AfterDoctypePublicKeyword);
-            } else if (r.matchConsumeIgnoreCase("SYSTEM")) {
-                t.transition(AfterDoctypeSystemKeyword);
-            } else {
-                t.error(this);
-                t.doctypePending.forceQuirks = true;
-                t.advanceTransition(BogusDoctype);
-            }
-
-        }
-    },
-    AfterDoctypePublicKeyword {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeDoctypePublicIdentifier);
-                    break;
-                case '"':
-                    t.error(this);
-                    // set public id to empty string
-                    t.transition(DoctypePublicIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    t.error(this);
-                    // set public id to empty string
-                    t.transition(DoctypePublicIdentifier_singleQuoted);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.transition(BogusDoctype);
-            }
-        }
-    },
-    BeforeDoctypePublicIdentifier {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break;
-                case '"':
-                    // set public id to empty string
-                    t.transition(DoctypePublicIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    // set public id to empty string
-                    t.transition(DoctypePublicIdentifier_singleQuoted);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.transition(BogusDoctype);
-            }
-        }
-    },
-    DoctypePublicIdentifier_doubleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '"':
-                    t.transition(AfterDoctypePublicIdentifier);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.publicIdentifier.append(replacementChar);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.doctypePending.publicIdentifier.append(c);
-            }
-        }
-    },
-    DoctypePublicIdentifier_singleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\'':
-                    t.transition(AfterDoctypePublicIdentifier);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.publicIdentifier.append(replacementChar);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.doctypePending.publicIdentifier.append(c);
-            }
-        }
-    },
-    AfterDoctypePublicIdentifier {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BetweenDoctypePublicAndSystemIdentifiers);
-                    break;
-                case '>':
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case '"':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_singleQuoted);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.transition(BogusDoctype);
-            }
-        }
-    },
-    BetweenDoctypePublicAndSystemIdentifiers {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break;
-                case '>':
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case '"':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_singleQuoted);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.transition(BogusDoctype);
-            }
-        }
-    },
-    AfterDoctypeSystemKeyword {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    t.transition(BeforeDoctypeSystemIdentifier);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case '"':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    t.error(this);
-                    // system id empty
-                    t.transition(DoctypeSystemIdentifier_singleQuoted);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-            }
-        }
-    },
-    BeforeDoctypeSystemIdentifier {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break;
-                case '"':
-                    // set system id to empty string
-                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
-                    break;
-                case '\'':
-                    // set public id to empty string
-                    t.transition(DoctypeSystemIdentifier_singleQuoted);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.transition(BogusDoctype);
-            }
-        }
-    },
-    DoctypeSystemIdentifier_doubleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '"':
-                    t.transition(AfterDoctypeSystemIdentifier);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.systemIdentifier.append(replacementChar);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.doctypePending.systemIdentifier.append(c);
-            }
-        }
-    },
-    DoctypeSystemIdentifier_singleQuoted {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\'':
-                    t.transition(AfterDoctypeSystemIdentifier);
-                    break;
-                case nullChar:
-                    t.error(this);
-                    t.doctypePending.systemIdentifier.append(replacementChar);
-                    break;
-                case '>':
-                    t.error(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.doctypePending.systemIdentifier.append(c);
-            }
-        }
-    },
-    AfterDoctypeSystemIdentifier {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '\t':
-                case '\n':
-                case '\f':
-                case ' ':
-                    break;
-                case '>':
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.eofError(this);
-                    t.doctypePending.forceQuirks = true;
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    t.error(this);
-                    t.transition(BogusDoctype);
-                    // NOT force quirks
-            }
-        }
-    },
-    BogusDoctype {
-        void read(Tokeniser t, CharacterReader r) {
-            char c = r.consume();
-            switch (c) {
-                case '>':
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                case eof:
-                    t.emitDoctypePending();
-                    t.transition(Data);
-                    break;
-                default:
-                    // ignore char
-                    break;
-            }
-        }
-    },
-    CdataSection {
-        void read(Tokeniser t, CharacterReader r) {
-            String data = r.consumeTo("]]>");
-            t.emit(data);
-            r.matchConsume("]]>");
-            t.transition(Data);
-        }
-    };
-
-
-    abstract void read(Tokeniser t, CharacterReader r);
-
-    private static final char nullChar = '\u0000';
-    private static final char replacementChar = Tokeniser.replacementChar;
-    private static final String replacementStr = String.valueOf(Tokeniser.replacementChar);
-    private static final char eof = CharacterReader.EOF;
-}
diff --git a/src/org/jsoup/parser/TreeBuilder.java b/src/org/jsoup/parser/TreeBuilder.java
deleted file mode 100644
index e06caad501..0000000000
--- a/src/org/jsoup/parser/TreeBuilder.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * @author Jonathan Hedley
- */
-abstract class TreeBuilder {
-    CharacterReader reader;
-    Tokeniser tokeniser;
-    protected Document doc; // current doc we are building into
-    protected DescendableLinkedList<Element> stack; // the stack of open elements
-    protected String baseUri; // current base uri, for creating new elements
-    protected Token currentToken; // currentToken is used only for error tracking.
-    protected ParseErrorList errors; // null when not tracking errors
-
-    protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
-        Validate.notNull(input, "String input must not be null");
-        Validate.notNull(baseUri, "BaseURI must not be null");
-
-        doc = new Document(baseUri);
-        reader = new CharacterReader(input);
-        this.errors = errors;
-        tokeniser = new Tokeniser(reader, errors);
-        stack = new DescendableLinkedList<Element>();
-        this.baseUri = baseUri;
-    }
-
-    Document parse(String input, String baseUri) {
-        return parse(input, baseUri, ParseErrorList.noTracking());
-    }
-
-    Document parse(String input, String baseUri, ParseErrorList errors) {
-        initialiseParse(input, baseUri, errors);
-        runParser();
-        return doc;
-    }
-
-    protected void runParser() {
-        while (true) {
-            Token token = tokeniser.read();
-            process(token);
-
-            if (token.type == Token.TokenType.EOF)
-                break;
-        }
-    }
-
-    protected abstract boolean process(Token token);
-
-    protected Element currentElement() {
-        return stack.getLast();
-    }
-}
diff --git a/src/org/jsoup/parser/XmlTreeBuilder.java b/src/org/jsoup/parser/XmlTreeBuilder.java
deleted file mode 100644
index 3f03ad26ac..0000000000
--- a/src/org/jsoup/parser/XmlTreeBuilder.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
-
-import java.util.Iterator;
-
-/**
- * @author Jonathan Hedley
- */
-public class XmlTreeBuilder extends TreeBuilder {
-    @Override
-    protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
-        super.initialiseParse(input, baseUri, errors);
-        stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
-    }
-
-    @Override
-    protected boolean process(Token token) {
-        // start tag, end tag, doctype, comment, character, eof
-        switch (token.type) {
-            case StartTag:
-                insert(token.asStartTag());
-                break;
-            case EndTag:
-                popStackToClose(token.asEndTag());
-                break;
-            case Comment:
-                insert(token.asComment());
-                break;
-            case Character:
-                insert(token.asCharacter());
-                break;
-            case Doctype:
-                insert(token.asDoctype());
-                break;
-            case EOF: // could put some normalisation here if desired
-                break;
-            default:
-                Validate.fail("Unexpected token type: " + token.type);
-        }
-        return true;
-    }
-
-    private void insertNode(Node node) {
-        currentElement().appendChild(node);
-    }
-
-    Element insert(Token.StartTag startTag) {
-        Tag tag = Tag.valueOf(startTag.name());
-        // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
-        Element el = new Element(tag, baseUri, startTag.attributes);
-        insertNode(el);
-        if (startTag.isSelfClosing()) {
-            tokeniser.acknowledgeSelfClosingFlag();
-            if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above.
-                tag.setSelfClosing();
-        } else {
-            stack.add(el);
-        }
-        return el;
-    }
-
-    void insert(Token.Comment commentToken) {
-        Comment comment = new Comment(commentToken.getData(), baseUri);
-        insertNode(comment);
-    }
-
-    void insert(Token.Character characterToken) {
-        Node node = new TextNode(characterToken.getData(), baseUri);
-        insertNode(node);
-    }
-
-    void insert(Token.Doctype d) {
-        DocumentType doctypeNode = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
-        insertNode(doctypeNode);
-    }
-
-    /**
-     * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
-     * found, skips.
-     *
-     * @param endTag
-     */
-    private void popStackToClose(Token.EndTag endTag) {
-        String elName = endTag.name();
-        Element firstFound = null;
-
-        Iterator<Element> it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next.nodeName().equals(elName)) {
-                firstFound = next;
-                break;
-            }
-        }
-        if (firstFound == null)
-            return; // not found, skip
-
-        it = stack.descendingIterator();
-        while (it.hasNext()) {
-            Element next = it.next();
-            if (next == firstFound) {
-                it.remove();
-                break;
-            } else {
-                it.remove();
-            }
-        }
-    }
-}
diff --git a/src/org/jsoup/parser/package-info.java b/src/org/jsoup/parser/package-info.java
deleted file mode 100644
index 168fdf4086..0000000000
--- a/src/org/jsoup/parser/package-info.java
+++ /dev/null
@@ -1,4 +0,0 @@
-/**
- Contains the HTML parser, tag specifications, and HTML tokeniser.
- */
-package org.jsoup.parser;