aboutsummaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/parser/TokenQueue.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/parser/TokenQueue.java')
-rw-r--r--server/src/org/jsoup/parser/TokenQueue.java274
1 files changed, 177 insertions, 97 deletions
diff --git a/server/src/org/jsoup/parser/TokenQueue.java b/server/src/org/jsoup/parser/TokenQueue.java
index a2fdfe621a..3e7127e640 100644
--- a/server/src/org/jsoup/parser/TokenQueue.java
+++ b/server/src/org/jsoup/parser/TokenQueue.java
@@ -5,18 +5,20 @@ import org.jsoup.helper.Validate;
/**
* A character queue with parsing helpers.
- *
+ *
* @author Jonathan Hedley
*/
public class TokenQueue {
private String queue;
private int pos = 0;
-
+
private static final char ESC = '\\'; // escape char for chomp balanced.
/**
- Create a new TokenQueue.
- @param data string of data to back queue.
+ * Create a new TokenQueue.
+ *
+ * @param data
+ * string of data to back queue.
*/
public TokenQueue(String data) {
Validate.notNull(data);
@@ -25,18 +27,20 @@ public class TokenQueue {
/**
* Is the queue empty?
+ *
* @return true if no data left in queue.
*/
public boolean isEmpty() {
return remainingLength() == 0;
}
-
+
private int remainingLength() {
return queue.length() - pos;
}
/**
* Retrieves but does not remove the first character from the queue.
+ *
* @return First character, or 0 if empty.
*/
public char peek() {
@@ -44,16 +48,21 @@ public class TokenQueue {
}
/**
- Add a character to the start of the queue (will be the next character retrieved).
- @param c character to add
+ * Add a character to the start of the queue (will be the next character
+ * retrieved).
+ *
+ * @param c
+ * character to add
*/
public void addFirst(Character c) {
addFirst(c.toString());
}
/**
- Add a string to the start of the queue.
- @param seq string to add.
+ * Add a string to the start of the queue.
+ *
+ * @param seq
+ * string to add.
*/
public void addFirst(String seq) {
// not very performant, but an edge case
@@ -62,8 +71,11 @@ public class TokenQueue {
}
/**
- * Tests if the next characters on the queue match the sequence. Case insensitive.
- * @param seq String to check queue for.
+ * Tests if the next characters on the queue match the sequence. Case
+ * insensitive.
+ *
+ * @param seq
+ * String to check queue for.
* @return true if the next characters match.
*/
public boolean matches(String seq) {
@@ -72,47 +84,57 @@ public class TokenQueue {
/**
* Case sensitive match test.
- * @param seq string to case sensitively check for
+ *
+ * @param seq
+ * string to case sensitively check for
* @return true if matched, false if not
*/
public boolean matchesCS(String seq) {
return queue.startsWith(seq, pos);
}
-
/**
- Tests if the next characters match any of the sequences. Case insensitive.
- @param seq list of strings to case insensitively check for
- @return true of any matched, false if none did
+ * Tests if the next characters match any of the sequences. Case
+ * insensitive.
+ *
+ * @param seq
+ * list of strings to case insensitively check for
+ * @return true of any matched, false if none did
*/
public boolean matchesAny(String... seq) {
for (String s : seq) {
- if (matches(s))
+ if (matches(s)) {
return true;
+ }
}
return false;
}
public boolean matchesAny(char... seq) {
- if (isEmpty())
+ if (isEmpty()) {
return false;
+ }
- for (char c: seq) {
- if (queue.charAt(pos) == c)
+ for (char c : seq) {
+ if (queue.charAt(pos) == c) {
return true;
+ }
}
return false;
}
public boolean matchesStartTag() {
// micro opt for matching "<x"
- return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));
+ return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character
+ .isLetter(queue.charAt(pos + 1)));
}
/**
- * Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
- * queue.
- * @param seq String to search for, and if found, remove from queue.
+ * Tests if the queue matches the sequence (as with match), and if they do,
+ * removes the matched string from the queue.
+ *
+ * @param seq
+ * String to search for, and if found, remove from queue.
* @return true if found and removed, false if not found.
*/
public boolean matchChomp(String seq) {
@@ -125,16 +147,18 @@ public class TokenQueue {
}
/**
- Tests if queue starts with a whitespace character.
- @return if starts with whitespace
+ * Tests if queue starts with a whitespace character.
+ *
+ * @return if starts with whitespace
*/
public boolean matchesWhitespace() {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
}
/**
- Test if the queue matches a word character (letter or digit).
- @return if matches a word character
+ * Test if the queue matches a word character (letter or digit).
+ *
+ * @return if matches a word character
*/
public boolean matchesWord() {
return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
@@ -144,11 +168,14 @@ public class TokenQueue {
* Drops the next character off the queue.
*/
public void advance() {
- if (!isEmpty()) pos++;
+ if (!isEmpty()) {
+ pos++;
+ }
}
/**
* Consume one character off queue.
+ *
* @return first character on queue.
*/
public char consume() {
@@ -156,25 +183,36 @@ public class TokenQueue {
}
/**
- * Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
- * throw an illegal state exception -- but you should be running match() against that condition.
- <p>
- Case insensitive.
- * @param seq sequence to remove from head of queue.
+ * Consumes the supplied sequence of the queue. If the queue does not start
+ * with the supplied sequence, will throw an illegal state exception -- but
+ * you should be running match() against that condition.
+ * <p>
+ * Case insensitive.
+ *
+ * @param seq
+ * sequence to remove from head of queue.
*/
public void consume(String seq) {
- if (!matches(seq))
- throw new IllegalStateException("Queue did not match expected sequence");
+ if (!matches(seq)) {
+ throw new IllegalStateException(
+ "Queue did not match expected sequence");
+ }
int len = seq.length();
- if (len > remainingLength())
- throw new IllegalStateException("Queue not long enough to consume sequence");
-
+ if (len > remainingLength()) {
+ throw new IllegalStateException(
+ "Queue not long enough to consume sequence");
+ }
+
pos += len;
}
/**
- * Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
- * @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
+ * Pulls a string off the queue, up to but exclusive of the match sequence,
+ * or to the queue running out.
+ *
+ * @param seq
+ * String to end on (and not include in return, but leave on
+ * queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
public String consumeTo(String seq) {
@@ -187,38 +225,52 @@ public class TokenQueue {
return remainder();
}
}
-
+
public String consumeToIgnoreCase(String seq) {
int start = pos;
String first = seq.substring(0, 1);
- boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
+ boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if
+ // first
+ // is
+ // not
+ // cased,
+ // use
+ // index
+ // of
while (!isEmpty()) {
- if (matches(seq))
+ if (matches(seq)) {
break;
-
+ }
+
if (canScan) {
int skip = queue.indexOf(first, pos) - pos;
- if (skip == 0) // this char is the skip char, but not match, so force advance of pos
+ if (skip == 0) {
pos++;
- else if (skip < 0) // no chance of finding, grab to end
+ } else if (skip < 0) {
pos = queue.length();
- else
+ } else {
pos += skip;
- }
- else
+ }
+ } else {
pos++;
+ }
}
- String data = queue.substring(start, pos);
- return data;
+ String data = queue.substring(start, pos);
+ return data;
}
/**
- Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
- @param seq any number of terminators to consume to. <b>Case insensitive.</b>
- @return consumed string
+ * Consumes to the first sequence provided, or to the end of the queue.
+ * Leaves the terminator on the queue.
+ *
+ * @param seq
+ * any number of terminators to consume to. <b>Case
+ * insensitive.</b>
+ * @return consumed string
*/
- // todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
+ // todo: method name. not good that consumeTo cares for case, and consume to
+ // any doesn't. And the only use for this
// is is a case sensitive time...
public String consumeToAny(String... seq) {
int start = pos;
@@ -226,16 +278,20 @@ public class TokenQueue {
pos++;
}
- String data = queue.substring(start, pos);
- return data;
+ String data = queue.substring(start, pos);
+ return data;
}
/**
- * Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
+ * Pulls a string off the queue (like consumeTo), and then pulls off the
+ * matched string (but does not return it).
* <p>
- * If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
- * isEmpty() == true).
- * @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
+ * If the queue runs out of characters before finding the seq, will return
+ * as much as it can (and queue will go isEmpty() == true).
+ *
+ * @param seq
+ * String to match up to, and not include in return, and to pull
+ * off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
public String chompTo(String seq) {
@@ -243,7 +299,7 @@ public class TokenQueue {
matchChomp(seq);
return data;
}
-
+
public String chompToIgnoreCase(String seq) {
String data = consumeToIgnoreCase(seq); // case insensitive scan
matchChomp(seq);
@@ -251,12 +307,17 @@ public class TokenQueue {
}
/**
- * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
- * and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
- * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
+ * Pulls a balanced string off the queue. E.g. if queue is
+ * "(one (two) three) four", (,) will return "one (two) three", and leave
+ * " four" on the queue. Unbalanced openers and closers can be escaped (with
+ * \). Those escapes will be left in the returned string, which is suitable
+ * for regexes (where we need to preserve the escape), but unsuitable for
* contains text strings; use unescape for that.
- * @param open opener
- * @param close closer
+ *
+ * @param open
+ * opener
+ * @param close
+ * closer
* @return data matched from the queue
*/
public String chompBalanced(char open, char close) {
@@ -265,25 +326,32 @@ public class TokenQueue {
char last = 0;
do {
- if (isEmpty()) break;
+ if (isEmpty()) {
+ break;
+ }
Character c = consume();
if (last == 0 || last != ESC) {
- if (c.equals(open))
+ if (c.equals(open)) {
depth++;
- else if (c.equals(close))
+ } else if (c.equals(close)) {
depth--;
+ }
}
- if (depth > 0 && last != 0)
- accum.append(c); // don't include the outer match pair in the return
+ if (depth > 0 && last != 0) {
+ accum.append(c); // don't include the outer match pair in the
+ // return
+ }
last = c;
} while (depth > 0);
return accum.toString();
}
-
+
/**
* Unescaped a \ escaped string.
- * @param in backslash escaped string
+ *
+ * @param in
+ * backslash escaped string
* @return unescaped string
*/
public static String unescape(String in) {
@@ -291,11 +359,12 @@ public class TokenQueue {
char last = 0;
for (char c : in.toCharArray()) {
if (c == ESC) {
- if (last != 0 && last == ESC)
+ if (last != 0 && last == ESC) {
out.append(c);
- }
- else
+ }
+ } else {
out.append(c);
+ }
last = c;
}
return out.toString();
@@ -315,15 +384,17 @@ public class TokenQueue {
/**
* Retrieves the next run of word type (letter or digit) off the queue.
+ *
* @return String of word characters from queue, or empty string if none.
*/
public String consumeWord() {
int start = pos;
- while (matchesWord())
+ while (matchesWord()) {
pos++;
+ }
return queue.substring(start, pos);
}
-
+
/**
* Consume an tag name off the queue (word or :, _, -)
*
@@ -331,53 +402,61 @@ public class TokenQueue {
*/
public String consumeTagName() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
+ while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
-
+
/**
- * Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
+ * Consume a CSS element selector (tag name, but | instead of : for
+ * namespaces, to not conflict with :pseudo selects).
*
* @return tag name
*/
public String consumeElementSelector() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
+ while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
/**
- Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
- http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
- @return identifier
+ * Consume a CSS identifier (ID or class) off the queue (letter, digit, -,
+ * _) http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
+ *
+ * @return identifier
*/
public String consumeCssIdentifier() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
+ while (!isEmpty() && (matchesWord() || matchesAny('-', '_'))) {
pos++;
+ }
return queue.substring(start, pos);
}
/**
- Consume an attribute key off the queue (letter, digit, -, _, :")
- @return attribute key
+ * Consume an attribute key off the queue (letter, digit, -, _, :")
+ *
+ * @return attribute key
*/
public String consumeAttributeKey() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
+ while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
/**
- Consume and return whatever is left on the queue.
- @return remained of queue.
+ * Consume and return whatever is left on the queue.
+ *
+ * @return remained of queue.
*/
public String remainder() {
StringBuilder accum = new StringBuilder();
@@ -386,7 +465,8 @@ public class TokenQueue {
}
return accum.toString();
}
-
+
+ @Override
public String toString() {
return queue.substring(pos);
}