1 files changed, 0 insertions, 264 deletions
diff --git a/server/src/org/jsoup/parser/Tokeniser.java b/server/src/org/jsoup/parser/Tokeniser.java
deleted file mode 100644
index f46c962281..0000000000
--- a/server/src/org/jsoup/parser/Tokeniser.java
+++ /dev/null
@@ -1,264 +0,0 @@
-package org.jsoup.parser;
-
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Entities;
-
-/**
- * Readers the input stream into tokens.
- */
-class Tokeniser {
-    static final char replacementChar = '\uFFFD'; // replaces null character
-
-    private CharacterReader reader; // html input
-    private ParseErrorList errors; // errors found while tokenising
-
-    private TokeniserState state = TokeniserState.Data; // current tokenisation
-                                                        // state
-    private Token emitPending; // the token we are about to emit on next read
-    private boolean isEmitPending = false;
-    private StringBuilder charBuffer = new StringBuilder(); // buffers
-                                                            // characters to
-                                                            // output as one
-                                                            // token
-    StringBuilder dataBuffer; // buffers data looking for </script>
-
-    Token.Tag tagPending; // tag we are building up
-    Token.Doctype doctypePending; // doctype building up
-    Token.Comment commentPending; // comment building up
-    private Token.StartTag lastStartTag; // the last start tag emitted, to test
-                                         // appropriate end tag
-    private boolean selfClosingFlagAcknowledged = true;
-
-    Tokeniser(CharacterReader reader, ParseErrorList errors) {
-        this.reader = reader;
-        this.errors = errors;
-    }
-
-    Token read() {
-        if (!selfClosingFlagAcknowledged) {
-            error("Self closing flag not acknowledged");
-            selfClosingFlagAcknowledged = true;
-        }
-
-        while (!isEmitPending) {
-            state.read(this, reader);
-        }
-
-        // if emit is pending, a non-character token was found: return any chars
-        // in buffer, and leave token for next read:
-        if (charBuffer.length() > 0) {
-            String str = charBuffer.toString();
-            charBuffer.delete(0, charBuffer.length());
-            return new Token.Character(str);
-        } else {
-            isEmitPending = false;
-            return emitPending;
-        }
-    }
-
-    void emit(Token token) {
-        Validate.isFalse(isEmitPending, "There is an unread token pending!");
-
-        emitPending = token;
-        isEmitPending = true;
-
-        if (token.type == Token.TokenType.StartTag) {
-            Token.StartTag startTag = (Token.StartTag) token;
-            lastStartTag = startTag;
-            if (startTag.selfClosing) {
-                selfClosingFlagAcknowledged = false;
-            }
-        } else if (token.type == Token.TokenType.EndTag) {
-            Token.EndTag endTag = (Token.EndTag) token;
-            if (endTag.attributes.size() > 0) {
-                error("Attributes incorrectly present on end tag");
-            }
-        }
-    }
-
-    void emit(String str) {
-        // buffer strings up until last string token found, to emit only one
-        // token for a run of character refs etc.
-        // does not set isEmitPending; read checks that
-        charBuffer.append(str);
-    }
-
-    void emit(char c) {
-        charBuffer.append(c);
-    }
-
-    TokeniserState getState() {
-        return state;
-    }
-
-    void transition(TokeniserState state) {
-        this.state = state;
-    }
-
-    void advanceTransition(TokeniserState state) {
-        reader.advance();
-        this.state = state;
-    }
-
-    void acknowledgeSelfClosingFlag() {
-        selfClosingFlagAcknowledged = true;
-    }
-
-    Character consumeCharacterReference(Character additionalAllowedCharacter,
-            boolean inAttribute) {
-        if (reader.isEmpty()) {
-            return null;
-        }
-        if (additionalAllowedCharacter != null
-                && additionalAllowedCharacter == reader.current()) {
-            return null;
-        }
-        if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&')) {
-            return null;
-        }
-
-        reader.mark();
-        if (reader.matchConsume("#")) { // numbered
-            boolean isHexMode = reader.matchConsumeIgnoreCase("X");
-            String numRef = isHexMode ? reader.consumeHexSequence() : reader
-                    .consumeDigitSequence();
-            if (numRef.length() == 0) { // didn't match anything
-                characterReferenceError("numeric reference with no numerals");
-                reader.rewindToMark();
-                return null;
-            }
-            if (!reader.matchConsume(";")) {
-                characterReferenceError("missing semicolon"); // missing semi
-            }
-            int charval = -1;
-            try {
-                int base = isHexMode ? 16 : 10;
-                charval = Integer.valueOf(numRef, base);
-            } catch (NumberFormatException e) {
-            } // skip
-            if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF)
-                    || charval > 0x10FFFF) {
-                characterReferenceError("character outside of valid range");
-                return replacementChar;
-            } else {
-                // todo: implement number replacement table
-                // todo: check for extra illegal unicode points as parse errors
-                return (char) charval;
-            }
-        } else { // named
-            // get as many letters as possible, and look for matching entities.
-            // unconsume backwards till a match is found
-            String nameRef = reader.consumeLetterThenDigitSequence();
-            String origNameRef = new String(nameRef); // for error reporting.
-                                                      // nameRef gets chomped
-                                                      // looking for matches
-            boolean looksLegit = reader.matches(';');
-            boolean found = false;
-            while (nameRef.length() > 0 && !found) {
-                if (Entities.isNamedEntity(nameRef)) {
-                    found = true;
-                } else {
-                    nameRef = nameRef.substring(0, nameRef.length() - 1);
-                    reader.unconsume();
-                }
-            }
-            if (!found) {
-                if (looksLegit) {
-                    characterReferenceError(String.format(
-                            "invalid named referenece '%s'", origNameRef));
-                }
-                reader.rewindToMark();
-                return null;
-            }
-            if (inAttribute
-                    && (reader.matchesLetter() || reader.matchesDigit() || reader
-                            .matchesAny('=', '-', '_'))) {
-                // don't want that to match
-                reader.rewindToMark();
-                return null;
-            }
-            if (!reader.matchConsume(";")) {
-                characterReferenceError("missing semicolon"); // missing semi
-            }
-            return Entities.getCharacterByName(nameRef);
-        }
-    }
-
-    Token.Tag createTagPending(boolean start) {
-        tagPending = start ? new Token.StartTag() : new Token.EndTag();
-        return tagPending;
-    }
-
-    void emitTagPending() {
-        tagPending.finaliseTag();
-        emit(tagPending);
-    }
-
-    void createCommentPending() {
-        commentPending = new Token.Comment();
-    }
-
-    void emitCommentPending() {
-        emit(commentPending);
-    }
-
-    void createDoctypePending() {
-        doctypePending = new Token.Doctype();
-    }
-
-    void emitDoctypePending() {
-        emit(doctypePending);
-    }
-
-    void createTempBuffer() {
-        dataBuffer = new StringBuilder();
-    }
-
-    boolean isAppropriateEndTagToken() {
-        if (lastStartTag == null) {
-            return false;
-        }
-        return tagPending.tagName.equals(lastStartTag.tagName);
-    }
-
-    String appropriateEndTagName() {
-        return lastStartTag.tagName;
-    }
-
-    void error(TokeniserState state) {
-        if (errors.canAddError()) {
-            errors.add(new ParseError(reader.pos(),
-                    "Unexpected character '%s' in input state [%s]", reader
-                            .current(), state));
-        }
-    }
-
-    void eofError(TokeniserState state) {
-        if (errors.canAddError()) {
-            errors.add(new ParseError(
-                    reader.pos(),
-                    "Unexpectedly reached end of file (EOF) in input state [%s]",
-                    state));
-        }
-    }
-
-    private void characterReferenceError(String message) {
-        if (errors.canAddError()) {
-            errors.add(new ParseError(reader.pos(),
-                    "Invalid character reference: %s", message));
-        }
-    }
-
-    private void error(String errorMsg) {
-        if (errors.canAddError()) {
-            errors.add(new ParseError(reader.pos(), errorMsg));
-        }
-    }
-
-    boolean currentNodeInHtmlNS() {
-        // todo: implement namespaces correctly
-        return true;
-        // Element currentNode = currentNode();
-        // return currentNode != null && currentNode.namespace().equals("HTML");
-    }
-}