summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Node.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Node.java')
-rw-r--r--server/src/org/jsoup/nodes/Node.java615
1 files changed, 615 insertions, 0 deletions
diff --git a/server/src/org/jsoup/nodes/Node.java b/server/src/org/jsoup/nodes/Node.java
new file mode 100644
index 0000000000..eb2b40ee73
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Node.java
@@ -0,0 +1,615 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Parser;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public abstract class Node implements Cloneable {
+ Node parentNode;
+ List<Node> childNodes;
+ Attributes attributes;
+ String baseUri;
+ int siblingIndex;
+
+ /**
+ Create a new Node.
+ @param baseUri base URI
+ @param attributes attributes (not null, but may be empty)
+ */
+ protected Node(String baseUri, Attributes attributes) {
+ Validate.notNull(baseUri);
+ Validate.notNull(attributes);
+
+ childNodes = new ArrayList<Node>(4);
+ this.baseUri = baseUri.trim();
+ this.attributes = attributes;
+ }
+
+ protected Node(String baseUri) {
+ this(baseUri, new Attributes());
+ }
+
+ /**
+ * Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
+ */
+ protected Node() {
+ childNodes = Collections.emptyList();
+ attributes = null;
+ }
+
+ /**
+ Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
+ @return node name
+ */
+ public abstract String nodeName();
+
+ /**
+ * Get an attribute's value by its key.
+ * <p/>
+ * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
+ * which is a shortcut to the {@link #absUrl} method.
+ * E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
+ * @param attributeKey The attribute key.
+ * @return The attribute, or empty string if not present (to avoid nulls).
+ * @see #attributes()
+ * @see #hasAttr(String)
+ * @see #absUrl(String)
+ */
+ public String attr(String attributeKey) {
+ Validate.notNull(attributeKey);
+
+ if (attributes.hasKey(attributeKey))
+ return attributes.get(attributeKey);
+ else if (attributeKey.toLowerCase().startsWith("abs:"))
+ return absUrl(attributeKey.substring("abs:".length()));
+ else return "";
+ }
+
+ /**
+ * Get all of the element's attributes.
+ * @return attributes (which implements iterable, in same order as presented in original HTML).
+ */
+ public Attributes attributes() {
+ return attributes;
+ }
+
+ /**
+ * Set an attribute (key=value). If the attribute already exists, it is replaced.
+ * @param attributeKey The attribute key.
+ * @param attributeValue The attribute value.
+ * @return this (for chaining)
+ */
+ public Node attr(String attributeKey, String attributeValue) {
+ attributes.put(attributeKey, attributeValue);
+ return this;
+ }
+
+ /**
+ * Test if this element has an attribute.
+ * @param attributeKey The attribute key to check.
+ * @return true if the attribute exists, false if not.
+ */
+ public boolean hasAttr(String attributeKey) {
+ Validate.notNull(attributeKey);
+
+ if (attributeKey.toLowerCase().startsWith("abs:")) {
+ String key = attributeKey.substring("abs:".length());
+ if (attributes.hasKey(key) && !absUrl(key).equals(""))
+ return true;
+ }
+ return attributes.hasKey(attributeKey);
+ }
+
+ /**
+ * Remove an attribute from this element.
+ * @param attributeKey The attribute to remove.
+ * @return this (for chaining)
+ */
+ public Node removeAttr(String attributeKey) {
+ Validate.notNull(attributeKey);
+ attributes.remove(attributeKey);
+ return this;
+ }
+
+ /**
+ Get the base URI of this node.
+ @return base URI
+ */
+ public String baseUri() {
+ return baseUri;
+ }
+
+ /**
+ Update the base URI of this node and all of its descendants.
+ @param baseUri base URI to set
+ */
+ public void setBaseUri(final String baseUri) {
+ Validate.notNull(baseUri);
+
+ traverse(new NodeVisitor() {
+ public void head(Node node, int depth) {
+ node.baseUri = baseUri;
+ }
+
+ public void tail(Node node, int depth) {
+ }
+ });
+ }
+
+ /**
+ * Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href></code> or
+ * <code>&lt;img src></code>).
+ * <p/>
+ * E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
+ * <p/>
+ * If the attribute value is already absolute (i.e. it starts with a protocol, like
+ * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
+ * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
+ * absolute using that.
+ * <p/>
+ * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
+ * <code>String absUrl = linkEl.attr("abs:href");</code>
+ *
+ * @param attributeKey The attribute key
+ * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
+ * could not be made successfully into a URL.
+ * @see #attr
+ * @see java.net.URL#URL(java.net.URL, String)
+ */
+ public String absUrl(String attributeKey) {
+ Validate.notEmpty(attributeKey);
+
+ String relUrl = attr(attributeKey);
+ if (!hasAttr(attributeKey)) {
+ return ""; // nothing to make absolute with
+ } else {
+ URL base;
+ try {
+ try {
+ base = new URL(baseUri);
+ } catch (MalformedURLException e) {
+ // the base is unsuitable, but the attribute may be abs on its own, so try that
+ URL abs = new URL(relUrl);
+ return abs.toExternalForm();
+ }
+ // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
+ if (relUrl.startsWith("?"))
+ relUrl = base.getPath() + relUrl;
+ URL abs = new URL(base, relUrl);
+ return abs.toExternalForm();
+ } catch (MalformedURLException e) {
+ return "";
+ }
+ }
+ }
+
+ /**
+ Get a child node by index
+ @param index index of child node
+ @return the child node at this index.
+ */
+ public Node childNode(int index) {
+ return childNodes.get(index);
+ }
+
+ /**
+ Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
+ themselves can be manipulated.
+ @return list of children. If no children, returns an empty list.
+ */
+ public List<Node> childNodes() {
+ return Collections.unmodifiableList(childNodes);
+ }
+
+ protected Node[] childNodesAsArray() {
+ return childNodes.toArray(new Node[childNodes().size()]);
+ }
+
+ /**
+ Gets this node's parent node.
+ @return parent node; or null if no parent.
+ */
+ public Node parent() {
+ return parentNode;
+ }
+
+ /**
+ * Gets the Document associated with this Node.
+ * @return the Document associated with this Node, or null if there is no such Document.
+ */
+ public Document ownerDocument() {
+ if (this instanceof Document)
+ return (Document) this;
+ else if (parentNode == null)
+ return null;
+ else
+ return parentNode.ownerDocument();
+ }
+
+ /**
+ * Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
+ */
+ public void remove() {
+ Validate.notNull(parentNode);
+ parentNode.removeChild(this);
+ }
+
+ /**
+ * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
+ * @param html HTML to add before this node
+ * @return this node, for chaining
+ * @see #after(String)
+ */
+ public Node before(String html) {
+ addSiblingHtml(siblingIndex(), html);
+ return this;
+ }
+
+ /**
+ * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
+ * @param node to add before this node
+ * @return this node, for chaining
+ * @see #after(Node)
+ */
+ public Node before(Node node) {
+ Validate.notNull(node);
+ Validate.notNull(parentNode);
+
+ parentNode.addChildren(siblingIndex(), node);
+ return this;
+ }
+
+ /**
+ * Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
+ * @param html HTML to add after this node
+ * @return this node, for chaining
+ * @see #before(String)
+ */
+ public Node after(String html) {
+ addSiblingHtml(siblingIndex()+1, html);
+ return this;
+ }
+
+ /**
+ * Insert the specified node into the DOM after this node (i.e. as a following sibling).
+ * @param node to add after this node
+ * @return this node, for chaining
+ * @see #before(Node)
+ */
+ public Node after(Node node) {
+ Validate.notNull(node);
+ Validate.notNull(parentNode);
+
+ parentNode.addChildren(siblingIndex()+1, node);
+ return this;
+ }
+
+ private void addSiblingHtml(int index, String html) {
+ Validate.notNull(html);
+ Validate.notNull(parentNode);
+
+ Element context = parent() instanceof Element ? (Element) parent() : null;
+ List<Node> nodes = Parser.parseFragment(html, context, baseUri());
+ parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
+ }
+
+ /**
+ Wrap the supplied HTML around this node.
+ @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
+ @return this node, for chaining.
+ */
+ public Node wrap(String html) {
+ Validate.notEmpty(html);
+
+ Element context = parent() instanceof Element ? (Element) parent() : null;
+ List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
+ Node wrapNode = wrapChildren.get(0);
+ if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
+ return null;
+
+ Element wrap = (Element) wrapNode;
+ Element deepest = getDeepChild(wrap);
+ parentNode.replaceChild(this, wrap);
+ deepest.addChildren(this);
+
+ // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
+ if (wrapChildren.size() > 0) {
+ for (int i = 0; i < wrapChildren.size(); i++) {
+ Node remainder = wrapChildren.get(i);
+ remainder.parentNode.removeChild(remainder);
+ wrap.appendChild(remainder);
+ }
+ }
+ return this;
+ }
+
+ /**
+ * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
+ * the node but keeping its children.
+ * <p/>
+ * For example, with the input html:<br/>
+ * {@code <div>One <span>Two <b>Three</b></span></div>}<br/>
+ * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br/>
+ * {@code <div>One Two <b>Three</b></div>}<br/>
+ * and the {@code "Two "} {@link TextNode} being returned.
+ * @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
+ * @see #remove()
+ * @see #wrap(String)
+ */
+ public Node unwrap() {
+ Validate.notNull(parentNode);
+
+ int index = siblingIndex;
+ Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
+ parentNode.addChildren(index, this.childNodesAsArray());
+ this.remove();
+
+ return firstChild;
+ }
+
+ private Element getDeepChild(Element el) {
+ List<Element> children = el.children();
+ if (children.size() > 0)
+ return getDeepChild(children.get(0));
+ else
+ return el;
+ }
+
+ /**
+ * Replace this node in the DOM with the supplied node.
+ * @param in the node that will will replace the existing node.
+ */
+ public void replaceWith(Node in) {
+ Validate.notNull(in);
+ Validate.notNull(parentNode);
+ parentNode.replaceChild(this, in);
+ }
+
+ protected void setParentNode(Node parentNode) {
+ if (this.parentNode != null)
+ this.parentNode.removeChild(this);
+ this.parentNode = parentNode;
+ }
+
+ protected void replaceChild(Node out, Node in) {
+ Validate.isTrue(out.parentNode == this);
+ Validate.notNull(in);
+ if (in.parentNode != null)
+ in.parentNode.removeChild(in);
+
+ Integer index = out.siblingIndex();
+ childNodes.set(index, in);
+ in.parentNode = this;
+ in.setSiblingIndex(index);
+ out.parentNode = null;
+ }
+
+ protected void removeChild(Node out) {
+ Validate.isTrue(out.parentNode == this);
+ int index = out.siblingIndex();
+ childNodes.remove(index);
+ reindexChildren();
+ out.parentNode = null;
+ }
+
+ protected void addChildren(Node... children) {
+ //most used. short circuit addChildren(int), which hits reindex children and array copy
+ for (Node child: children) {
+ reparentChild(child);
+ childNodes.add(child);
+ child.setSiblingIndex(childNodes.size()-1);
+ }
+ }
+
+ protected void addChildren(int index, Node... children) {
+ Validate.noNullElements(children);
+ for (int i = children.length - 1; i >= 0; i--) {
+ Node in = children[i];
+ reparentChild(in);
+ childNodes.add(index, in);
+ }
+ reindexChildren();
+ }
+
+ private void reparentChild(Node child) {
+ if (child.parentNode != null)
+ child.parentNode.removeChild(child);
+ child.setParentNode(this);
+ }
+
+ private void reindexChildren() {
+ for (int i = 0; i < childNodes.size(); i++) {
+ childNodes.get(i).setSiblingIndex(i);
+ }
+ }
+
+ /**
+ Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
+ include this node (a node is not a sibling of itself).
+ @return node siblings. If the node has no parent, returns an empty list.
+ */
+ public List<Node> siblingNodes() {
+ if (parentNode == null)
+ return Collections.emptyList();
+
+ List<Node> nodes = parentNode.childNodes;
+ List<Node> siblings = new ArrayList<Node>(nodes.size() - 1);
+ for (Node node: nodes)
+ if (node != this)
+ siblings.add(node);
+ return siblings;
+ }
+
+ /**
+ Get this node's next sibling.
+ @return next sibling, or null if this is the last sibling
+ */
+ public Node nextSibling() {
+ if (parentNode == null)
+ return null; // root
+
+ List<Node> siblings = parentNode.childNodes;
+ Integer index = siblingIndex();
+ Validate.notNull(index);
+ if (siblings.size() > index+1)
+ return siblings.get(index+1);
+ else
+ return null;
+ }
+
+ /**
+ Get this node's previous sibling.
+ @return the previous sibling, or null if this is the first sibling
+ */
+ public Node previousSibling() {
+ if (parentNode == null)
+ return null; // root
+
+ List<Node> siblings = parentNode.childNodes;
+ Integer index = siblingIndex();
+ Validate.notNull(index);
+ if (index > 0)
+ return siblings.get(index-1);
+ else
+ return null;
+ }
+
+ /**
+ * Get the list index of this node in its node sibling list. I.e. if this is the first node
+ * sibling, returns 0.
+ * @return position in node sibling list
+ * @see org.jsoup.nodes.Element#elementSiblingIndex()
+ */
+ public int siblingIndex() {
+ return siblingIndex;
+ }
+
+ protected void setSiblingIndex(int siblingIndex) {
+ this.siblingIndex = siblingIndex;
+ }
+
+ /**
+ * Perform a depth-first traversal through this node and its descendants.
+ * @param nodeVisitor the visitor callbacks to perform on each node
+ * @return this node, for chaining
+ */
+ public Node traverse(NodeVisitor nodeVisitor) {
+ Validate.notNull(nodeVisitor);
+ NodeTraversor traversor = new NodeTraversor(nodeVisitor);
+ traversor.traverse(this);
+ return this;
+ }
+
+ /**
+ Get the outer HTML of this node.
+ @return HTML
+ */
+ public String outerHtml() {
+ StringBuilder accum = new StringBuilder(128);
+ outerHtml(accum);
+ return accum.toString();
+ }
+
+ protected void outerHtml(StringBuilder accum) {
+ new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this);
+ }
+
+ // if this node has no document (or parent), retrieve the default output settings
+ private Document.OutputSettings getOutputSettings() {
+ return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
+ }
+
+ /**
+ Get the outer HTML of this node.
+ @param accum accumulator to place HTML into
+ */
+ abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
+
+ abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
+ accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ // todo: have nodes hold a child index, compare against that and parent (not children)
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = parentNode != null ? parentNode.hashCode() : 0;
+ // not children, or will block stack as they go back up to parent)
+ result = 31 * result + (attributes != null ? attributes.hashCode() : 0);
+ return result;
+ }
+
+ /**
+ * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
+ * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
+ * original node.
+ * <p>
+ * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
+ * @return stand-alone cloned node
+ */
+ @Override
+ public Node clone() {
+ return doClone(null); // splits for orphan
+ }
+
+ protected Node doClone(Node parent) {
+ Node clone;
+ try {
+ clone = (Node) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+
+ clone.parentNode = parent; // can be null, to create an orphan split
+ clone.siblingIndex = parent == null ? 0 : siblingIndex;
+ clone.attributes = attributes != null ? attributes.clone() : null;
+ clone.baseUri = baseUri;
+ clone.childNodes = new ArrayList<Node>(childNodes.size());
+ for (Node child: childNodes)
+ clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent
+
+ return clone;
+ }
+
+ private static class OuterHtmlVisitor implements NodeVisitor {
+ private StringBuilder accum;
+ private Document.OutputSettings out;
+
+ OuterHtmlVisitor(StringBuilder accum, Document.OutputSettings out) {
+ this.accum = accum;
+ this.out = out;
+ }
+
+ public void head(Node node, int depth) {
+ node.outerHtmlHead(accum, depth, out);
+ }
+
+ public void tail(Node node, int depth) {
+ if (!node.nodeName().equals("#text")) // saves a void hit.
+ node.outerHtmlTail(accum, depth, out);
+ }
+ }
+}