diff options
Diffstat (limited to 'server/src/org/jsoup/select/QueryParser.java')
-rw-r--r-- | server/src/org/jsoup/select/QueryParser.java | 193 |
1 files changed, 117 insertions, 76 deletions
diff --git a/server/src/org/jsoup/select/QueryParser.java b/server/src/org/jsoup/select/QueryParser.java index d3cc36f91c..7a04899d82 100644 --- a/server/src/org/jsoup/select/QueryParser.java +++ b/server/src/org/jsoup/select/QueryParser.java @@ -12,7 +12,7 @@ import org.jsoup.parser.TokenQueue; * Parses a CSS selector into an Evaluator tree. */ class QueryParser { - private final static String[] combinators = {",", ">", "+", "~", " "}; + private final static String[] combinators = { ",", ">", "+", "~", " " }; private TokenQueue tq; private String query; @@ -20,16 +20,20 @@ class QueryParser { /** * Create a new QueryParser. - * @param query CSS query + * + * @param query + * CSS query */ private QueryParser(String query) { this.query = query; - this.tq = new TokenQueue(query); + tq = new TokenQueue(query); } /** * Parse a CSS query into an Evaluator. - * @param query CSS query + * + * @param query + * CSS query * @return Evaluator */ public static Evaluator parse(String query) { @@ -39,12 +43,14 @@ class QueryParser { /** * Parse the query + * * @return Evaluator */ Evaluator parse() { tq.consumeWhitespace(); - if (tq.matchesAny(combinators)) { // if starts with a combinator, use root as elements + if (tq.matchesAny(combinators)) { // if starts with a combinator, use + // root as elements evals.add(new StructuralEvaluator.Root()); combinator(tq.consume()); } else { @@ -64,8 +70,9 @@ class QueryParser { } } - if (evals.size() == 1) + if (evals.size() == 1) { return evals.get(0); + } return new CombiningEvaluator.And(evals); } @@ -75,33 +82,41 @@ class QueryParser { String subQuery = consumeSubQuery(); // support multi > childs Evaluator rootEval; // the new topmost evaluator - Evaluator currentEval; // the evaluator the new eval will be combined to. could be root, or rightmost or. - Evaluator newEval = parse(subQuery); // the evaluator to add into target evaluator + Evaluator currentEval; // the evaluator the new eval will be combined + // to. could be root, or rightmost or. + Evaluator newEval = parse(subQuery); // the evaluator to add into target + // evaluator boolean replaceRightMost = false; if (evals.size() == 1) { rootEval = currentEval = evals.get(0); // make sure OR (,) has precedence: if (rootEval instanceof CombiningEvaluator.Or && combinator != ',') { - currentEval = ((CombiningEvaluator.Or) currentEval).rightMostEvaluator(); + currentEval = ((CombiningEvaluator.Or) currentEval) + .rightMostEvaluator(); replaceRightMost = true; } - } - else { + } else { rootEval = currentEval = new CombiningEvaluator.And(evals); } evals.clear(); - // for most combinators: change the current eval into an AND of the current eval and the new eval - if (combinator == '>') - currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediateParent(currentEval)); - else if (combinator == ' ') - currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.Parent(currentEval)); - else if (combinator == '+') - currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediatePreviousSibling(currentEval)); - else if (combinator == '~') - currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.PreviousSibling(currentEval)); - else if (combinator == ',') { // group or. + // for most combinators: change the current eval into an AND of the + // current eval and the new eval + if (combinator == '>') { + currentEval = new CombiningEvaluator.And(newEval, + new StructuralEvaluator.ImmediateParent(currentEval)); + } else if (combinator == ' ') { + currentEval = new CombiningEvaluator.And(newEval, + new StructuralEvaluator.Parent(currentEval)); + } else if (combinator == '+') { + currentEval = new CombiningEvaluator.And(newEval, + new StructuralEvaluator.ImmediatePreviousSibling( + currentEval)); + } else if (combinator == '~') { + currentEval = new CombiningEvaluator.And(newEval, + new StructuralEvaluator.PreviousSibling(currentEval)); + } else if (combinator == ',') { // group or. CombiningEvaluator.Or or; if (currentEval instanceof CombiningEvaluator.Or) { or = (CombiningEvaluator.Or) currentEval; @@ -112,62 +127,70 @@ class QueryParser { or.add(newEval); } currentEval = or; + } else { + throw new Selector.SelectorParseException("Unknown combinator: " + + combinator); } - else - throw new Selector.SelectorParseException("Unknown combinator: " + combinator); - if (replaceRightMost) - ((CombiningEvaluator.Or) rootEval).replaceRightMostEvaluator(currentEval); - else rootEval = currentEval; + if (replaceRightMost) { + ((CombiningEvaluator.Or) rootEval) + .replaceRightMostEvaluator(currentEval); + } else { + rootEval = currentEval; + } evals.add(rootEval); } private String consumeSubQuery() { StringBuilder sq = new StringBuilder(); while (!tq.isEmpty()) { - if (tq.matches("(")) + if (tq.matches("(")) { sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); - else if (tq.matches("[")) + } else if (tq.matches("[")) { sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); - else if (tq.matchesAny(combinators)) + } else if (tq.matchesAny(combinators)) { break; - else + } else { sq.append(tq.consume()); + } } return sq.toString(); } private void findElements() { - if (tq.matchChomp("#")) + if (tq.matchChomp("#")) { byId(); - else if (tq.matchChomp(".")) + } else if (tq.matchChomp(".")) { byClass(); - else if (tq.matchesWord()) + } else if (tq.matchesWord()) { byTag(); - else if (tq.matches("[")) + } else if (tq.matches("[")) { byAttribute(); - else if (tq.matchChomp("*")) + } else if (tq.matchChomp("*")) { allElements(); - else if (tq.matchChomp(":lt(")) + } else if (tq.matchChomp(":lt(")) { indexLessThan(); - else if (tq.matchChomp(":gt(")) + } else if (tq.matchChomp(":gt(")) { indexGreaterThan(); - else if (tq.matchChomp(":eq(")) + } else if (tq.matchChomp(":eq(")) { indexEquals(); - else if (tq.matches(":has(")) + } else if (tq.matches(":has(")) { has(); - else if (tq.matches(":contains(")) + } else if (tq.matches(":contains(")) { contains(false); - else if (tq.matches(":containsOwn(")) + } else if (tq.matches(":containsOwn(")) { contains(true); - else if (tq.matches(":matches(")) + } else if (tq.matches(":matches(")) { matches(false); - else if (tq.matches(":matchesOwn(")) + } else if (tq.matches(":matchesOwn(")) { matches(true); - else if (tq.matches(":not(")) + } else if (tq.matches(":not(")) { not(); - else // unhandled - throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder()); + } else { + throw new Selector.SelectorParseException( + "Could not parse query '%s': unexpected token at '%s'", + query, tq.remainder()); + } } @@ -187,44 +210,58 @@ class QueryParser { String tagName = tq.consumeElementSelector(); Validate.notEmpty(tagName); - // namespaces: if element name is "abc:def", selector must be "abc|def", so flip: - if (tagName.contains("|")) + // namespaces: if element name is "abc:def", selector must be "abc|def", + // so flip: + if (tagName.contains("|")) { tagName = tagName.replace("|", ":"); + } evals.add(new Evaluator.Tag(tagName.trim().toLowerCase())); } private void byAttribute() { - TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue - String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val) + TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content + // queue + String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, + // not, + // start, + // end, + // contain, + // match, + // (no + // val) Validate.notEmpty(key); cq.consumeWhitespace(); if (cq.isEmpty()) { - if (key.startsWith("^")) + if (key.startsWith("^")) { evals.add(new Evaluator.AttributeStarting(key.substring(1))); - else + } else { evals.add(new Evaluator.Attribute(key)); + } } else { - if (cq.matchChomp("=")) + if (cq.matchChomp("=")) { evals.add(new Evaluator.AttributeWithValue(key, cq.remainder())); - - else if (cq.matchChomp("!=")) - evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder())); - - else if (cq.matchChomp("^=")) - evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder())); - - else if (cq.matchChomp("$=")) - evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder())); - - else if (cq.matchChomp("*=")) - evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder())); - - else if (cq.matchChomp("~=")) - evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder()))); - else - throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder()); + } else if (cq.matchChomp("!=")) { + evals.add(new Evaluator.AttributeWithValueNot(key, cq + .remainder())); + } else if (cq.matchChomp("^=")) { + evals.add(new Evaluator.AttributeWithValueStarting(key, cq + .remainder())); + } else if (cq.matchChomp("$=")) { + evals.add(new Evaluator.AttributeWithValueEnding(key, cq + .remainder())); + } else if (cq.matchChomp("*=")) { + evals.add(new Evaluator.AttributeWithValueContaining(key, cq + .remainder())); + } else if (cq.matchChomp("~=")) { + evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern + .compile(cq.remainder()))); + } else { + throw new Selector.SelectorParseException( + "Could not parse attribute query '%s': unexpected token at '%s'", + query, cq.remainder()); + } } } @@ -264,29 +301,33 @@ class QueryParser { tq.consume(own ? ":containsOwn" : ":contains"); String searchText = TokenQueue.unescape(tq.chompBalanced('(', ')')); Validate.notEmpty(searchText, ":contains(text) query must not be empty"); - if (own) + if (own) { evals.add(new Evaluator.ContainsOwnText(searchText)); - else + } else { evals.add(new Evaluator.ContainsText(searchText)); + } } // :matches(regex), matchesOwn(regex) private void matches(boolean own) { tq.consume(own ? ":matchesOwn" : ":matches"); - String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex bits will be escaped + String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex + // bits will be escaped Validate.notEmpty(regex, ":matches(regex) query must not be empty"); - if (own) + if (own) { evals.add(new Evaluator.MatchesOwn(Pattern.compile(regex))); - else + } else { evals.add(new Evaluator.Matches(Pattern.compile(regex))); + } } // :not(selector) private void not() { tq.consume(":not"); String subQuery = tq.chompBalanced('(', ')'); - Validate.notEmpty(subQuery, ":not(selector) subselect must not be empty"); + Validate.notEmpty(subQuery, + ":not(selector) subselect must not be empty"); evals.add(new StructuralEvaluator.Not(parse(subQuery))); } |