summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Element.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Element.java')
-rw-r--r--server/src/org/jsoup/nodes/Element.java1119
1 files changed, 1119 insertions, 0 deletions
diff --git a/server/src/org/jsoup/nodes/Element.java b/server/src/org/jsoup/nodes/Element.java
new file mode 100644
index 0000000000..5c1894c934
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Element.java
@@ -0,0 +1,1119 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Parser;
+import org.jsoup.parser.Tag;
+import org.jsoup.select.Collector;
+import org.jsoup.select.Elements;
+import org.jsoup.select.Evaluator;
+import org.jsoup.select.Selector;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
+ * other elements).
+ *
+ * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class Element extends Node {
+ private Tag tag;
+ private Set<String> classNames;
+
+ /**
+ * Create a new, standalone Element. (Standalone in that is has no parent.)
+ *
+ * @param tag tag of this element
+ * @param baseUri the base URI
+ * @param attributes initial attributes
+ * @see #appendChild(Node)
+ * @see #appendElement(String)
+ */
+ public Element(Tag tag, String baseUri, Attributes attributes) {
+ super(baseUri, attributes);
+
+ Validate.notNull(tag);
+ this.tag = tag;
+ }
+
+ /**
+ * Create a new Element from a tag and a base URI.
+ *
+ * @param tag element tag
+ * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
+ * string, but not null.
+ * @see Tag#valueOf(String)
+ */
+ public Element(Tag tag, String baseUri) {
+ this(tag, baseUri, new Attributes());
+ }
+
+ @Override
+ public String nodeName() {
+ return tag.getName();
+ }
+
+ /**
+ * Get the name of the tag for this element. E.g. {@code div}
+ *
+ * @return the tag name
+ */
+ public String tagName() {
+ return tag.getName();
+ }
+
+ /**
+ * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
+ * {@code el.tagName("div");}.
+ *
+ * @param tagName new tag name for this element
+ * @return this element, for chaining
+ */
+ public Element tagName(String tagName) {
+ Validate.notEmpty(tagName, "Tag name must not be empty.");
+ tag = Tag.valueOf(tagName);
+ return this;
+ }
+
+ /**
+ * Get the Tag for this element.
+ *
+ * @return the tag object
+ */
+ public Tag tag() {
+ return tag;
+ }
+
+ /**
+ * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
+ * {@code <p> == false}).
+ *
+ * @return true if block, false if not (and thus inline)
+ */
+ public boolean isBlock() {
+ return tag.isBlock();
+ }
+
+ /**
+ * Get the {@code id} attribute of this element.
+ *
+ * @return The id attribute, if present, or an empty string if not.
+ */
+ public String id() {
+ String id = attr("id");
+ return id == null ? "" : id;
+ }
+
+ /**
+ * Set an attribute value on this element. If this element already has an attribute with the
+ * key, its value is updated; otherwise, a new attribute is added.
+ *
+ * @return this element
+ */
+ public Element attr(String attributeKey, String attributeValue) {
+ super.attr(attributeKey, attributeValue);
+ return this;
+ }
+
+ /**
+ * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
+ * starting with "data-" is included the dataset.
+ * <p>
+ * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
+ * {@code package=jsoup, language=java}.
+ * <p>
+ * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
+ * in the other map.
+ * <p>
+ * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
+ * @return a map of {@code key=value} custom data attributes.
+ */
+ public Map<String, String> dataset() {
+ return attributes.dataset();
+ }
+
+ @Override
+ public final Element parent() {
+ return (Element) parentNode;
+ }
+
+ /**
+ * Get this element's parent and ancestors, up to the document root.
+ * @return this element's stack of parents, closest first.
+ */
+ public Elements parents() {
+ Elements parents = new Elements();
+ accumulateParents(this, parents);
+ return parents;
+ }
+
+ private static void accumulateParents(Element el, Elements parents) {
+ Element parent = el.parent();
+ if (parent != null && !parent.tagName().equals("#root")) {
+ parents.add(parent);
+ accumulateParents(parent, parents);
+ }
+ }
+
+ /**
+ * Get a child element of this element, by its 0-based index number.
+ * <p/>
+ * Note that an element can have both mixed Nodes and Elements as children. This method inspects
+ * a filtered list of children that are elements, and the index is based on that filtered list.
+ *
+ * @param index the index number of the element to retrieve
+ * @return the child element, if it exists, or {@code null} if absent.
+ * @see #childNode(int)
+ */
+ public Element child(int index) {
+ return children().get(index);
+ }
+
+ /**
+ * Get this element's child elements.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Element nodes.
+ * @return child elements. If this element has no children, returns an
+ * empty list.
+ * @see #childNodes()
+ */
+ public Elements children() {
+ // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change
+ List<Element> elements = new ArrayList<Element>();
+ for (Node node : childNodes) {
+ if (node instanceof Element)
+ elements.add((Element) node);
+ }
+ return new Elements(elements);
+ }
+
+ /**
+ * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Text nodes.
+ * @return child text nodes. If this element has no text nodes, returns an
+ * empty list.
+ * <p/>
+ * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
+ * <ul>
+ * <li>{@code p.text()} = {@code "One Two Three Four"}</li>
+ * <li>{@code p.ownText()} = {@code "One Three Four"}</li>
+ * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
+ * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
+ * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
+ * </ul>
+ */
+ public List<TextNode> textNodes() {
+ List<TextNode> textNodes = new ArrayList<TextNode>();
+ for (Node node : childNodes) {
+ if (node instanceof TextNode)
+ textNodes.add((TextNode) node);
+ }
+ return Collections.unmodifiableList(textNodes);
+ }
+
+ /**
+ * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Data nodes.
+ * @return child data nodes. If this element has no data nodes, returns an
+ * empty list.
+ * @see #data()
+ */
+ public List<DataNode> dataNodes() {
+ List<DataNode> dataNodes = new ArrayList<DataNode>();
+ for (Node node : childNodes) {
+ if (node instanceof DataNode)
+ dataNodes.add((DataNode) node);
+ }
+ return Collections.unmodifiableList(dataNodes);
+ }
+
+ /**
+ * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
+ * may include this element, or any of its children.
+ * <p/>
+ * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
+ * multiple filters can be combined, e.g.:
+ * <ul>
+ * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
+ * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
+ * </ul>
+ * <p/>
+ * See the query syntax documentation in {@link org.jsoup.select.Selector}.
+ *
+ * @param cssQuery a {@link Selector} CSS-like query
+ * @return elements that match the query (empty if none match)
+ * @see org.jsoup.select.Selector
+ */
+ public Elements select(String cssQuery) {
+ return Selector.select(cssQuery, this);
+ }
+
+ /**
+ * Add a node child node to this element.
+ *
+ * @param child node to add. Must not already have a parent.
+ * @return this element, so that you can add more child nodes or elements.
+ */
+ public Element appendChild(Node child) {
+ Validate.notNull(child);
+
+ addChildren(child);
+ return this;
+ }
+
+ /**
+ * Add a node to the start of this element's children.
+ *
+ * @param child node to add. Must not already have a parent.
+ * @return this element, so that you can add more child nodes or elements.
+ */
+ public Element prependChild(Node child) {
+ Validate.notNull(child);
+
+ addChildren(0, child);
+ return this;
+ }
+
+ /**
+ * Create a new element by tag name, and add it as the last child.
+ *
+ * @param tagName the name of the tag (e.g. {@code div}).
+ * @return the new element, to allow you to add content to it, e.g.:
+ * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
+ */
+ public Element appendElement(String tagName) {
+ Element child = new Element(Tag.valueOf(tagName), baseUri());
+ appendChild(child);
+ return child;
+ }
+
+ /**
+ * Create a new element by tag name, and add it as the first child.
+ *
+ * @param tagName the name of the tag (e.g. {@code div}).
+ * @return the new element, to allow you to add content to it, e.g.:
+ * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
+ */
+ public Element prependElement(String tagName) {
+ Element child = new Element(Tag.valueOf(tagName), baseUri());
+ prependChild(child);
+ return child;
+ }
+
+ /**
+ * Create and append a new TextNode to this element.
+ *
+ * @param text the unencoded text to add
+ * @return this element
+ */
+ public Element appendText(String text) {
+ TextNode node = new TextNode(text, baseUri());
+ appendChild(node);
+ return this;
+ }
+
+ /**
+ * Create and prepend a new TextNode to this element.
+ *
+ * @param text the unencoded text to add
+ * @return this element
+ */
+ public Element prependText(String text) {
+ TextNode node = new TextNode(text, baseUri());
+ prependChild(node);
+ return this;
+ }
+
+ /**
+ * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
+ * @param html HTML to add inside this element, after the existing HTML
+ * @return this element
+ * @see #html(String)
+ */
+ public Element append(String html) {
+ Validate.notNull(html);
+
+ List<Node> nodes = Parser.parseFragment(html, this, baseUri());
+ addChildren(nodes.toArray(new Node[nodes.size()]));
+ return this;
+ }
+
+ /**
+ * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
+ * @param html HTML to add inside this element, before the existing HTML
+ * @return this element
+ * @see #html(String)
+ */
+ public Element prepend(String html) {
+ Validate.notNull(html);
+
+ List<Node> nodes = Parser.parseFragment(html, this, baseUri());
+ addChildren(0, nodes.toArray(new Node[nodes.size()]));
+ return this;
+ }
+
+ /**
+ * Insert the specified HTML into the DOM before this element (i.e. as a preceding sibling).
+ *
+ * @param html HTML to add before this element
+ * @return this element, for chaining
+ * @see #after(String)
+ */
+ @Override
+ public Element before(String html) {
+ return (Element) super.before(html);
+ }
+
+ /**
+ * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
+ * @param node to add before this element
+ * @return this Element, for chaining
+ * @see #after(Node)
+ */
+ @Override
+ public Element before(Node node) {
+ return (Element) super.before(node);
+ }
+
+ /**
+ * Insert the specified HTML into the DOM after this element (i.e. as a following sibling).
+ *
+ * @param html HTML to add after this element
+ * @return this element, for chaining
+ * @see #before(String)
+ */
+ @Override
+ public Element after(String html) {
+ return (Element) super.after(html);
+ }
+
+ /**
+ * Insert the specified node into the DOM after this node (i.e. as a following sibling).
+ * @param node to add after this element
+ * @return this element, for chaining
+ * @see #before(Node)
+ */
+ @Override
+ public Element after(Node node) {
+ return (Element) super.after(node);
+ }
+
+ /**
+ * Remove all of the element's child nodes. Any attributes are left as-is.
+ * @return this element
+ */
+ public Element empty() {
+ childNodes.clear();
+ return this;
+ }
+
+ /**
+ * Wrap the supplied HTML around this element.
+ *
+ * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
+ * @return this element, for chaining.
+ */
+ @Override
+ public Element wrap(String html) {
+ return (Element) super.wrap(html);
+ }
+
+ /**
+ * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
+ * of itself, so will not be included in the returned list.
+ * @return sibling elements
+ */
+ public Elements siblingElements() {
+ if (parentNode == null)
+ return new Elements(0);
+
+ List<Element> elements = parent().children();
+ Elements siblings = new Elements(elements.size() - 1);
+ for (Element el: elements)
+ if (el != this)
+ siblings.add(el);
+ return siblings;
+ }
+
+ /**
+ * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
+ * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
+ * <p/>
+ * This is similar to {@link #nextSibling()}, but specifically finds only Elements
+ * @return the next element, or null if there is no next element
+ * @see #previousElementSibling()
+ */
+ public Element nextElementSibling() {
+ if (parentNode == null) return null;
+ List<Element> siblings = parent().children();
+ Integer index = indexInList(this, siblings);
+ Validate.notNull(index);
+ if (siblings.size() > index+1)
+ return siblings.get(index+1);
+ else
+ return null;
+ }
+
+ /**
+ * Gets the previous element sibling of this element.
+ * @return the previous element, or null if there is no previous element
+ * @see #nextElementSibling()
+ */
+ public Element previousElementSibling() {
+ if (parentNode == null) return null;
+ List<Element> siblings = parent().children();
+ Integer index = indexInList(this, siblings);
+ Validate.notNull(index);
+ if (index > 0)
+ return siblings.get(index-1);
+ else
+ return null;
+ }
+
+ /**
+ * Gets the first element sibling of this element.
+ * @return the first sibling that is an element (aka the parent's first element child)
+ */
+ public Element firstElementSibling() {
+ // todo: should firstSibling() exclude this?
+ List<Element> siblings = parent().children();
+ return siblings.size() > 1 ? siblings.get(0) : null;
+ }
+
+ /**
+ * Get the list index of this element in its element sibling list. I.e. if this is the first element
+ * sibling, returns 0.
+ * @return position in element sibling list
+ */
+ public Integer elementSiblingIndex() {
+ if (parent() == null) return 0;
+ return indexInList(this, parent().children());
+ }
+
+ /**
+ * Gets the last element sibling of this element
+ * @return the last sibling that is an element (aka the parent's last element child)
+ */
+ public Element lastElementSibling() {
+ List<Element> siblings = parent().children();
+ return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null;
+ }
+
+ private static <E extends Element> Integer indexInList(Element search, List<E> elements) {
+ Validate.notNull(search);
+ Validate.notNull(elements);
+
+ for (int i = 0; i < elements.size(); i++) {
+ E element = elements.get(i);
+ if (element.equals(search))
+ return i;
+ }
+ return null;
+ }
+
+ // DOM type methods
+
+ /**
+ * Finds elements, including and recursively under this element, with the specified tag name.
+ * @param tagName The tag name to search for (case insensitively).
+ * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
+ */
+ public Elements getElementsByTag(String tagName) {
+ Validate.notEmpty(tagName);
+ tagName = tagName.toLowerCase().trim();
+
+ return Collector.collect(new Evaluator.Tag(tagName), this);
+ }
+
+ /**
+ * Find an element by ID, including or under this element.
+ * <p>
+ * Note that this finds the first matching ID, starting with this element. If you search down from a different
+ * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
+ * use {@link Document#getElementById(String)}
+ * @param id The ID to search for.
+ * @return The first matching element by ID, starting with this element, or null if none found.
+ */
+ public Element getElementById(String id) {
+ Validate.notEmpty(id);
+
+ Elements elements = Collector.collect(new Evaluator.Id(id), this);
+ if (elements.size() > 0)
+ return elements.get(0);
+ else
+ return null;
+ }
+
+ /**
+ * Find elements that have this class, including or under this element. Case insensitive.
+ * <p>
+ * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method
+ * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
+ *
+ * @param className the name of the class to search for.
+ * @return elements with the supplied class name, empty if none
+ * @see #hasClass(String)
+ * @see #classNames()
+ */
+ public Elements getElementsByClass(String className) {
+ Validate.notEmpty(className);
+
+ return Collector.collect(new Evaluator.Class(className), this);
+ }
+
+ /**
+ * Find elements that have a named attribute set. Case insensitive.
+ *
+ * @param key name of the attribute, e.g. {@code href}
+ * @return elements that have this attribute, empty if none
+ */
+ public Elements getElementsByAttribute(String key) {
+ Validate.notEmpty(key);
+ key = key.trim().toLowerCase();
+
+ return Collector.collect(new Evaluator.Attribute(key), this);
+ }
+
+ /**
+ * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
+ * that have HTML5 datasets.
+ * @param keyPrefix name prefix of the attribute e.g. {@code data-}
+ * @return elements that have attribute names that start with with the prefix, empty if none.
+ */
+ public Elements getElementsByAttributeStarting(String keyPrefix) {
+ Validate.notEmpty(keyPrefix);
+ keyPrefix = keyPrefix.trim().toLowerCase();
+
+ return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
+ }
+
+ /**
+ * Find elements that have an attribute with the specific value. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param value value of the attribute
+ * @return elements that have this attribute with this value, empty if none
+ */
+ public Elements getElementsByAttributeValue(String key, String value) {
+ return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
+ }
+
+ /**
+ * Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param value value of the attribute
+ * @return elements that do not have a matching attribute
+ */
+ public Elements getElementsByAttributeValueNot(String key, String value) {
+ return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
+ }
+
+ /**
+ * Find elements that have attributes that start with the value prefix. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param valuePrefix start of attribute value
+ * @return elements that have attributes that start with the value prefix
+ */
+ public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
+ return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
+ }
+
+ /**
+ * Find elements that have attributes that end with the value suffix. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param valueSuffix end of the attribute value
+ * @return elements that have attributes that end with the value suffix
+ */
+ public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
+ return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
+ }
+
+ /**
+ * Find elements that have attributes whose value contains the match string. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param match substring of value to search for
+ * @return elements that have attributes containing this text
+ */
+ public Elements getElementsByAttributeValueContaining(String key, String match) {
+ return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
+ }
+
+ /**
+ * Find elements that have attributes whose values match the supplied regular expression.
+ * @param key name of the attribute
+ * @param pattern compiled regular expression to match against attribute values
+ * @return elements that have attributes matching this regular expression
+ */
+ public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
+ return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
+
+ }
+
+ /**
+ * Find elements that have attributes whose values match the supplied regular expression.
+ * @param key name of the attribute
+ * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements that have attributes matching this regular expression
+ */
+ public Elements getElementsByAttributeValueMatching(String key, String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsByAttributeValueMatching(key, pattern);
+ }
+
+ /**
+ * Find elements whose sibling index is less than the supplied index.
+ * @param index 0-based index
+ * @return elements less than index
+ */
+ public Elements getElementsByIndexLessThan(int index) {
+ return Collector.collect(new Evaluator.IndexLessThan(index), this);
+ }
+
+ /**
+ * Find elements whose sibling index is greater than the supplied index.
+ * @param index 0-based index
+ * @return elements greater than index
+ */
+ public Elements getElementsByIndexGreaterThan(int index) {
+ return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
+ }
+
+ /**
+ * Find elements whose sibling index is equal to the supplied index.
+ * @param index 0-based index
+ * @return elements equal to index
+ */
+ public Elements getElementsByIndexEquals(int index) {
+ return Collector.collect(new Evaluator.IndexEquals(index), this);
+ }
+
+ /**
+ * Find elements that contain the specified string. The search is case insensitive. The text may appear directly
+ * in the element, or in any of its descendants.
+ * @param searchText to look for in the element's text
+ * @return elements that contain the string, case insensitive.
+ * @see Element#text()
+ */
+ public Elements getElementsContainingText(String searchText) {
+ return Collector.collect(new Evaluator.ContainsText(searchText), this);
+ }
+
+ /**
+ * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
+ * in the element, not in any of its descendants.
+ * @param searchText to look for in the element's own text
+ * @return elements that contain the string, case insensitive.
+ * @see Element#ownText()
+ */
+ public Elements getElementsContainingOwnText(String searchText) {
+ return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param pattern regular expression to match text against
+ * @return elements matching the supplied regular expression.
+ * @see Element#text()
+ */
+ public Elements getElementsMatchingText(Pattern pattern) {
+ return Collector.collect(new Evaluator.Matches(pattern), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements matching the supplied regular expression.
+ * @see Element#text()
+ */
+ public Elements getElementsMatchingText(String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsMatchingText(pattern);
+ }
+
+ /**
+ * Find elements whose own text matches the supplied regular expression.
+ * @param pattern regular expression to match text against
+ * @return elements matching the supplied regular expression.
+ * @see Element#ownText()
+ */
+ public Elements getElementsMatchingOwnText(Pattern pattern) {
+ return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements matching the supplied regular expression.
+ * @see Element#ownText()
+ */
+ public Elements getElementsMatchingOwnText(String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsMatchingOwnText(pattern);
+ }
+
+ /**
+ * Find all elements under this element (including self, and children of children).
+ *
+ * @return all elements
+ */
+ public Elements getAllElements() {
+ return Collector.collect(new Evaluator.AllElements(), this);
+ }
+
+ /**
+ * Gets the combined text of this element and all its children.
+ * <p>
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.text()} returns {@code "Hello there now!"}
+ *
+ * @return unencoded text, or empty string if none.
+ * @see #ownText()
+ * @see #textNodes()
+ */
+ public String text() {
+ StringBuilder sb = new StringBuilder();
+ text(sb);
+ return sb.toString().trim();
+ }
+
+ private void text(StringBuilder accum) {
+ appendWhitespaceIfBr(this, accum);
+
+ for (Node child : childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ appendNormalisedText(accum, textNode);
+ } else if (child instanceof Element) {
+ Element element = (Element) child;
+ if (accum.length() > 0 && element.isBlock() && !TextNode.lastCharIsWhitespace(accum))
+ accum.append(" ");
+ element.text(accum);
+ }
+ }
+ }
+
+ /**
+ * Gets the text owned by this element only; does not get the combined text of all children.
+ * <p>
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
+ * whereas {@code p.text()} returns {@code "Hello there now!"}.
+ * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
+ *
+ * @return unencoded text, or empty string if none.
+ * @see #text()
+ * @see #textNodes()
+ */
+ public String ownText() {
+ StringBuilder sb = new StringBuilder();
+ ownText(sb);
+ return sb.toString().trim();
+ }
+
+ private void ownText(StringBuilder accum) {
+ for (Node child : childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ appendNormalisedText(accum, textNode);
+ } else if (child instanceof Element) {
+ appendWhitespaceIfBr((Element) child, accum);
+ }
+ }
+ }
+
+ private void appendNormalisedText(StringBuilder accum, TextNode textNode) {
+ String text = textNode.getWholeText();
+
+ if (!preserveWhitespace()) {
+ text = TextNode.normaliseWhitespace(text);
+ if (TextNode.lastCharIsWhitespace(accum))
+ text = TextNode.stripLeadingWhitespace(text);
+ }
+ accum.append(text);
+ }
+
+ private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
+ if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
+ accum.append(" ");
+ }
+
+ boolean preserveWhitespace() {
+ return tag.preserveWhitespace() || parent() != null && parent().preserveWhitespace();
+ }
+
+ /**
+ * Set the text of this element. Any existing contents (text or elements) will be cleared
+ * @param text unencoded text
+ * @return this element
+ */
+ public Element text(String text) {
+ Validate.notNull(text);
+
+ empty();
+ TextNode textNode = new TextNode(text, baseUri);
+ appendChild(textNode);
+
+ return this;
+ }
+
+ /**
+ Test if this element has any text content (that is not just whitespace).
+ @return true if element has non-blank text content.
+ */
+ public boolean hasText() {
+ for (Node child: childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ if (!textNode.isBlank())
+ return true;
+ } else if (child instanceof Element) {
+ Element el = (Element) child;
+ if (el.hasText())
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
+ * @return the data, or empty string if none
+ *
+ * @see #dataNodes()
+ */
+ public String data() {
+ StringBuilder sb = new StringBuilder();
+
+ for (Node childNode : childNodes) {
+ if (childNode instanceof DataNode) {
+ DataNode data = (DataNode) childNode;
+ sb.append(data.getWholeData());
+ } else if (childNode instanceof Element) {
+ Element element = (Element) childNode;
+ String elementData = element.data();
+ sb.append(elementData);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
+ * separated. (E.g. on <code>&lt;div class="header gray"></code> returns, "<code>header gray</code>")
+ * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
+ */
+ public String className() {
+ return attr("class");
+ }
+
+ /**
+ * Get all of the element's class names. E.g. on element {@code <div class="header gray"}>},
+ * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
+ * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
+ * @return set of classnames, empty if no class attribute
+ */
+ public Set<String> classNames() {
+ if (classNames == null) {
+ String[] names = className().split("\\s+");
+ classNames = new LinkedHashSet<String>(Arrays.asList(names));
+ }
+ return classNames;
+ }
+
+ /**
+ Set the element's {@code class} attribute to the supplied class names.
+ @param classNames set of classes
+ @return this element, for chaining
+ */
+ public Element classNames(Set<String> classNames) {
+ Validate.notNull(classNames);
+ attributes.put("class", StringUtil.join(classNames, " "));
+ return this;
+ }
+
+ /**
+ * Tests if this element has a class. Case insensitive.
+ * @param className name of class to check for
+ * @return true if it does, false if not
+ */
+ public boolean hasClass(String className) {
+ Set<String> classNames = classNames();
+ for (String name : classNames) {
+ if (className.equalsIgnoreCase(name))
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ Add a class name to this element's {@code class} attribute.
+ @param className class name to add
+ @return this element
+ */
+ public Element addClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ classes.add(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ Remove a class name from this element's {@code class} attribute.
+ @param className class name to remove
+ @return this element
+ */
+ public Element removeClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ classes.remove(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
+ @param className class name to toggle
+ @return this element
+ */
+ public Element toggleClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ if (classes.contains(className))
+ classes.remove(className);
+ else
+ classes.add(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ * Get the value of a form element (input, textarea, etc).
+ * @return the value of the form element, or empty string if not set.
+ */
+ public String val() {
+ if (tagName().equals("textarea"))
+ return text();
+ else
+ return attr("value");
+ }
+
+ /**
+ * Set the value of a form element (input, textarea, etc).
+ * @param value value to set
+ * @return this element (for chaining)
+ */
+ public Element val(String value) {
+ if (tagName().equals("textarea"))
+ text(value);
+ else
+ attr("value", value);
+ return this;
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ if (accum.length() > 0 && out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock())))
+ indent(accum, depth, out);
+ accum
+ .append("<")
+ .append(tagName());
+ attributes.html(accum, out);
+
+ if (childNodes.isEmpty() && tag.isSelfClosing())
+ accum.append(" />");
+ else
+ accum.append(">");
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
+ if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
+ if (out.prettyPrint() && !childNodes.isEmpty() && tag.formatAsBlock())
+ indent(accum, depth, out);
+ accum.append("</").append(tagName()).append(">");
+ }
+ }
+
+ /**
+ * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
+ * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
+ *
+ * @return String of HTML.
+ * @see #outerHtml()
+ */
+ public String html() {
+ StringBuilder accum = new StringBuilder();
+ html(accum);
+ return accum.toString().trim();
+ }
+
+ private void html(StringBuilder accum) {
+ for (Node node : childNodes)
+ node.outerHtml(accum);
+ }
+
+ /**
+ * Set this element's inner HTML. Clears the existing HTML first.
+ * @param html HTML to parse and set into this element
+ * @return this element
+ * @see #append(String)
+ */
+ public Element html(String html) {
+ empty();
+ append(html);
+ return this;
+ }
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return this == o;
+ }
+
+ @Override
+ public int hashCode() {
+ // todo: fixup, not very useful
+ int result = super.hashCode();
+ result = 31 * result + (tag != null ? tag.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public Element clone() {
+ Element clone = (Element) super.clone();
+ clone.classNames(); // creates linked set of class names from class attribute
+ return clone;
+ }
+}