summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/parser/CharacterReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/parser/CharacterReader.java')
-rw-r--r--server/src/org/jsoup/parser/CharacterReader.java230
1 files changed, 230 insertions, 0 deletions
diff --git a/server/src/org/jsoup/parser/CharacterReader.java b/server/src/org/jsoup/parser/CharacterReader.java
new file mode 100644
index 0000000000..b549a571a0
--- /dev/null
+++ b/server/src/org/jsoup/parser/CharacterReader.java
@@ -0,0 +1,230 @@
+package org.jsoup.parser;
+
+import org.jsoup.helper.Validate;
+
+/**
+ CharacterReader consumes tokens off a string. To replace the old TokenQueue.
+ */
+class CharacterReader {
+ static final char EOF = (char) -1;
+
+ private final String input;
+ private final int length;
+ private int pos = 0;
+ private int mark = 0;
+
+ CharacterReader(String input) {
+ Validate.notNull(input);
+ input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns to newlines
+
+ this.input = input;
+ this.length = input.length();
+ }
+
+ int pos() {
+ return pos;
+ }
+
+ boolean isEmpty() {
+ return pos >= length;
+ }
+
+ char current() {
+ return isEmpty() ? EOF : input.charAt(pos);
+ }
+
+ char consume() {
+ char val = isEmpty() ? EOF : input.charAt(pos);
+ pos++;
+ return val;
+ }
+
+ void unconsume() {
+ pos--;
+ }
+
+ void advance() {
+ pos++;
+ }
+
+ void mark() {
+ mark = pos;
+ }
+
+ void rewindToMark() {
+ pos = mark;
+ }
+
+ String consumeAsString() {
+ return input.substring(pos, pos++);
+ }
+
+ String consumeTo(char c) {
+ int offset = input.indexOf(c, pos);
+ if (offset != -1) {
+ String consumed = input.substring(pos, offset);
+ pos += consumed.length();
+ return consumed;
+ } else {
+ return consumeToEnd();
+ }
+ }
+
+ String consumeTo(String seq) {
+ int offset = input.indexOf(seq, pos);
+ if (offset != -1) {
+ String consumed = input.substring(pos, offset);
+ pos += consumed.length();
+ return consumed;
+ } else {
+ return consumeToEnd();
+ }
+ }
+
+ String consumeToAny(char... seq) {
+ int start = pos;
+
+ OUTER: while (!isEmpty()) {
+ char c = input.charAt(pos);
+ for (char seek : seq) {
+ if (seek == c)
+ break OUTER;
+ }
+ pos++;
+ }
+
+ return pos > start ? input.substring(start, pos) : "";
+ }
+
+ String consumeToEnd() {
+ String data = input.substring(pos, input.length());
+ pos = input.length();
+ return data;
+ }
+
+ String consumeLetterSequence() {
+ int start = pos;
+ while (!isEmpty()) {
+ char c = input.charAt(pos);
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+ pos++;
+ else
+ break;
+ }
+
+ return input.substring(start, pos);
+ }
+
+ String consumeLetterThenDigitSequence() {
+ int start = pos;
+ while (!isEmpty()) {
+ char c = input.charAt(pos);
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+ pos++;
+ else
+ break;
+ }
+ while (!isEmpty()) {
+ char c = input.charAt(pos);
+ if (c >= '0' && c <= '9')
+ pos++;
+ else
+ break;
+ }
+
+ return input.substring(start, pos);
+ }
+
+ String consumeHexSequence() {
+ int start = pos;
+ while (!isEmpty()) {
+ char c = input.charAt(pos);
+ if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
+ pos++;
+ else
+ break;
+ }
+ return input.substring(start, pos);
+ }
+
+ String consumeDigitSequence() {
+ int start = pos;
+ while (!isEmpty()) {
+ char c = input.charAt(pos);
+ if (c >= '0' && c <= '9')
+ pos++;
+ else
+ break;
+ }
+ return input.substring(start, pos);
+ }
+
+ boolean matches(char c) {
+ return !isEmpty() && input.charAt(pos) == c;
+
+ }
+
+ boolean matches(String seq) {
+ return input.startsWith(seq, pos);
+ }
+
+ boolean matchesIgnoreCase(String seq) {
+ return input.regionMatches(true, pos, seq, 0, seq.length());
+ }
+
+ boolean matchesAny(char... seq) {
+ if (isEmpty())
+ return false;
+
+ char c = input.charAt(pos);
+ for (char seek : seq) {
+ if (seek == c)
+ return true;
+ }
+ return false;
+ }
+
+ boolean matchesLetter() {
+ if (isEmpty())
+ return false;
+ char c = input.charAt(pos);
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ }
+
+ boolean matchesDigit() {
+ if (isEmpty())
+ return false;
+ char c = input.charAt(pos);
+ return (c >= '0' && c <= '9');
+ }
+
+ boolean matchConsume(String seq) {
+ if (matches(seq)) {
+ pos += seq.length();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ boolean matchConsumeIgnoreCase(String seq) {
+ if (matchesIgnoreCase(seq)) {
+ pos += seq.length();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ boolean containsIgnoreCase(String seq) {
+ // used to check presence of </title>, </style>. only finds consistent case.
+ String loScan = seq.toLowerCase();
+ String hiScan = seq.toUpperCase();
+ return (input.indexOf(loScan, pos) > -1) || (input.indexOf(hiScan, pos) > -1);
+ }
+
+ @Override
+ public String toString() {
+ return input.substring(pos);
+ }
+}