path: root/server/src/org/jsoup/select/
diff options
authorArtur Signell <>2012-09-09 12:21:22 +0300
committerArtur Signell <>2012-09-09 12:21:22 +0300
commit2905256998b69b2da6cc5bfbbe2c9230d7cacd48 (patch)
treefb5562714c4e59b04f8bfaa1092e45e1cf32e3cb /server/src/org/jsoup/select/
parent17c0f672543946e0c39b42c37cc5e1d43607e2b4 (diff)
parent65de3244f6f263a59492d32085057e895f68d1a8 (diff)
Merge branch 'master' into html5-doctype
Diffstat (limited to 'server/src/org/jsoup/select/')
1 files changed, 0 insertions, 334 deletions
diff --git a/server/src/org/jsoup/select/ b/server/src/org/jsoup/select/
deleted file mode 100644
index 7a04899d82..0000000000
--- a/server/src/org/jsoup/select/
+++ /dev/null
@@ -1,334 +0,0 @@
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-import org.jsoup.parser.TokenQueue;
- * Parses a CSS selector into an Evaluator tree.
- */
-class QueryParser {
- private final static String[] combinators = { ",", ">", "+", "~", " " };
- private TokenQueue tq;
- private String query;
- private List<Evaluator> evals = new ArrayList<Evaluator>();
- /**
- * Create a new QueryParser.
- *
- * @param query
- * CSS query
- */
- private QueryParser(String query) {
- this.query = query;
- tq = new TokenQueue(query);
- }
- /**
- * Parse a CSS query into an Evaluator.
- *
- * @param query
- * CSS query
- * @return Evaluator
- */
- public static Evaluator parse(String query) {
- QueryParser p = new QueryParser(query);
- return p.parse();
- }
- /**
- * Parse the query
- *
- * @return Evaluator
- */
- Evaluator parse() {
- tq.consumeWhitespace();
- if (tq.matchesAny(combinators)) { // if starts with a combinator, use
- // root as elements
- evals.add(new StructuralEvaluator.Root());
- combinator(tq.consume());
- } else {
- findElements();
- }
- while (!tq.isEmpty()) {
- // hierarchy and extras
- boolean seenWhite = tq.consumeWhitespace();
- if (tq.matchesAny(combinators)) {
- combinator(tq.consume());
- } else if (seenWhite) {
- combinator(' ');
- } else { // E.class, E#id, E[attr] etc. AND
- findElements(); // take next el, #. etc off queue
- }
- }
- if (evals.size() == 1) {
- return evals.get(0);
- }
- return new CombiningEvaluator.And(evals);
- }
- private void combinator(char combinator) {
- tq.consumeWhitespace();
- String subQuery = consumeSubQuery(); // support multi > childs
- Evaluator rootEval; // the new topmost evaluator
- Evaluator currentEval; // the evaluator the new eval will be combined
- // to. could be root, or rightmost or.
- Evaluator newEval = parse(subQuery); // the evaluator to add into target
- // evaluator
- boolean replaceRightMost = false;
- if (evals.size() == 1) {
- rootEval = currentEval = evals.get(0);
- // make sure OR (,) has precedence:
- if (rootEval instanceof CombiningEvaluator.Or && combinator != ',') {
- currentEval = ((CombiningEvaluator.Or) currentEval)
- .rightMostEvaluator();
- replaceRightMost = true;
- }
- } else {
- rootEval = currentEval = new CombiningEvaluator.And(evals);
- }
- evals.clear();
- // for most combinators: change the current eval into an AND of the
- // current eval and the new eval
- if (combinator == '>') {
- currentEval = new CombiningEvaluator.And(newEval,
- new StructuralEvaluator.ImmediateParent(currentEval));
- } else if (combinator == ' ') {
- currentEval = new CombiningEvaluator.And(newEval,
- new StructuralEvaluator.Parent(currentEval));
- } else if (combinator == '+') {
- currentEval = new CombiningEvaluator.And(newEval,
- new StructuralEvaluator.ImmediatePreviousSibling(
- currentEval));
- } else if (combinator == '~') {
- currentEval = new CombiningEvaluator.And(newEval,
- new StructuralEvaluator.PreviousSibling(currentEval));
- } else if (combinator == ',') { // group or.
- CombiningEvaluator.Or or;
- if (currentEval instanceof CombiningEvaluator.Or) {
- or = (CombiningEvaluator.Or) currentEval;
- or.add(newEval);
- } else {
- or = new CombiningEvaluator.Or();
- or.add(currentEval);
- or.add(newEval);
- }
- currentEval = or;
- } else {
- throw new Selector.SelectorParseException("Unknown combinator: "
- + combinator);
- }
- if (replaceRightMost) {
- ((CombiningEvaluator.Or) rootEval)
- .replaceRightMostEvaluator(currentEval);
- } else {
- rootEval = currentEval;
- }
- evals.add(rootEval);
- }
- private String consumeSubQuery() {
- StringBuilder sq = new StringBuilder();
- while (!tq.isEmpty()) {
- if (tq.matches("(")) {
- sq.append("(").append(tq.chompBalanced('(', ')')).append(")");
- } else if (tq.matches("[")) {
- sq.append("[").append(tq.chompBalanced('[', ']')).append("]");
- } else if (tq.matchesAny(combinators)) {
- break;
- } else {
- sq.append(tq.consume());
- }
- }
- return sq.toString();
- }
- private void findElements() {
- if (tq.matchChomp("#")) {
- byId();
- } else if (tq.matchChomp(".")) {
- byClass();
- } else if (tq.matchesWord()) {
- byTag();
- } else if (tq.matches("[")) {
- byAttribute();
- } else if (tq.matchChomp("*")) {
- allElements();
- } else if (tq.matchChomp(":lt(")) {
- indexLessThan();
- } else if (tq.matchChomp(":gt(")) {
- indexGreaterThan();
- } else if (tq.matchChomp(":eq(")) {
- indexEquals();
- } else if (tq.matches(":has(")) {
- has();
- } else if (tq.matches(":contains(")) {
- contains(false);
- } else if (tq.matches(":containsOwn(")) {
- contains(true);
- } else if (tq.matches(":matches(")) {
- matches(false);
- } else if (tq.matches(":matchesOwn(")) {
- matches(true);
- } else if (tq.matches(":not(")) {
- not();
- } else {
- throw new Selector.SelectorParseException(
- "Could not parse query '%s': unexpected token at '%s'",
- query, tq.remainder());
- }
- }
- private void byId() {
- String id = tq.consumeCssIdentifier();
- Validate.notEmpty(id);
- evals.add(new Evaluator.Id(id));
- }
- private void byClass() {
- String className = tq.consumeCssIdentifier();
- Validate.notEmpty(className);
- evals.add(new Evaluator.Class(className.trim().toLowerCase()));
- }
- private void byTag() {
- String tagName = tq.consumeElementSelector();
- Validate.notEmpty(tagName);
- // namespaces: if element name is "abc:def", selector must be "abc|def",
- // so flip:
- if (tagName.contains("|")) {
- tagName = tagName.replace("|", ":");
- }
- evals.add(new Evaluator.Tag(tagName.trim().toLowerCase()));
- }
- private void byAttribute() {
- TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content
- // queue
- String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq,
- // not,
- // start,
- // end,
- // contain,
- // match,
- // (no
- // val)
- Validate.notEmpty(key);
- cq.consumeWhitespace();
- if (cq.isEmpty()) {
- if (key.startsWith("^")) {
- evals.add(new Evaluator.AttributeStarting(key.substring(1)));
- } else {
- evals.add(new Evaluator.Attribute(key));
- }
- } else {
- if (cq.matchChomp("=")) {
- evals.add(new Evaluator.AttributeWithValue(key, cq.remainder()));
- } else if (cq.matchChomp("!=")) {
- evals.add(new Evaluator.AttributeWithValueNot(key, cq
- .remainder()));
- } else if (cq.matchChomp("^=")) {
- evals.add(new Evaluator.AttributeWithValueStarting(key, cq
- .remainder()));
- } else if (cq.matchChomp("$=")) {
- evals.add(new Evaluator.AttributeWithValueEnding(key, cq
- .remainder()));
- } else if (cq.matchChomp("*=")) {
- evals.add(new Evaluator.AttributeWithValueContaining(key, cq
- .remainder()));
- } else if (cq.matchChomp("~=")) {
- evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern
- .compile(cq.remainder())));
- } else {
- throw new Selector.SelectorParseException(
- "Could not parse attribute query '%s': unexpected token at '%s'",
- query, cq.remainder());
- }
- }
- }
- private void allElements() {
- evals.add(new Evaluator.AllElements());
- }
- // pseudo selectors :lt, :gt, :eq
- private void indexLessThan() {
- evals.add(new Evaluator.IndexLessThan(consumeIndex()));
- }
- private void indexGreaterThan() {
- evals.add(new Evaluator.IndexGreaterThan(consumeIndex()));
- }
- private void indexEquals() {
- evals.add(new Evaluator.IndexEquals(consumeIndex()));
- }
- private int consumeIndex() {
- String indexS = tq.chompTo(")").trim();
- Validate.isTrue(StringUtil.isNumeric(indexS), "Index must be numeric");
- return Integer.parseInt(indexS);
- }
- // pseudo selector :has(el)
- private void has() {
- tq.consume(":has");
- String subQuery = tq.chompBalanced('(', ')');
- Validate.notEmpty(subQuery, ":has(el) subselect must not be empty");
- evals.add(new StructuralEvaluator.Has(parse(subQuery)));
- }
- // pseudo selector :contains(text), containsOwn(text)
- private void contains(boolean own) {
- tq.consume(own ? ":containsOwn" : ":contains");
- String searchText = TokenQueue.unescape(tq.chompBalanced('(', ')'));
- Validate.notEmpty(searchText, ":contains(text) query must not be empty");
- if (own) {
- evals.add(new Evaluator.ContainsOwnText(searchText));
- } else {
- evals.add(new Evaluator.ContainsText(searchText));
- }
- }
- // :matches(regex), matchesOwn(regex)
- private void matches(boolean own) {
- tq.consume(own ? ":matchesOwn" : ":matches");
- String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex
- // bits will be escaped
- Validate.notEmpty(regex, ":matches(regex) query must not be empty");
- if (own) {
- evals.add(new Evaluator.MatchesOwn(Pattern.compile(regex)));
- } else {
- evals.add(new Evaluator.Matches(Pattern.compile(regex)));
- }
- }
- // :not(selector)
- private void not() {
- tq.consume(":not");
- String subQuery = tq.chompBalanced('(', ')');
- Validate.notEmpty(subQuery,
- ":not(selector) subselect must not be empty");
- evals.add(new StructuralEvaluator.Not(parse(subQuery)));
- }