summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes')
-rw-r--r--server/src/org/jsoup/nodes/Attribute.java131
-rw-r--r--server/src/org/jsoup/nodes/Attributes.java249
-rw-r--r--server/src/org/jsoup/nodes/Comment.java46
-rw-r--r--server/src/org/jsoup/nodes/DataNode.java62
-rw-r--r--server/src/org/jsoup/nodes/Document.java350
-rw-r--r--server/src/org/jsoup/nodes/DocumentType.java46
-rw-r--r--server/src/org/jsoup/nodes/Element.java1119
-rw-r--r--server/src/org/jsoup/nodes/Entities.java184
-rw-r--r--server/src/org/jsoup/nodes/Node.java615
-rw-r--r--server/src/org/jsoup/nodes/TextNode.java175
-rw-r--r--server/src/org/jsoup/nodes/XmlDeclaration.java48
-rw-r--r--server/src/org/jsoup/nodes/entities-base.properties106
-rw-r--r--server/src/org/jsoup/nodes/entities-full.properties2032
-rw-r--r--server/src/org/jsoup/nodes/package-info.java4
14 files changed, 5167 insertions, 0 deletions
diff --git a/server/src/org/jsoup/nodes/Attribute.java b/server/src/org/jsoup/nodes/Attribute.java
new file mode 100644
index 0000000000..02eb29db83
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Attribute.java
@@ -0,0 +1,131 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.Validate;
+
+import java.util.Map;
+
+/**
+ A single key + value attribute. Keys are trimmed and normalised to lower-case.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class Attribute implements Map.Entry<String, String>, Cloneable {
+ private String key;
+ private String value;
+
+ /**
+ * Create a new attribute from unencoded (raw) key and value.
+ * @param key attribute key
+ * @param value attribute value
+ * @see #createFromEncoded
+ */
+ public Attribute(String key, String value) {
+ Validate.notEmpty(key);
+ Validate.notNull(value);
+ this.key = key.trim().toLowerCase();
+ this.value = value;
+ }
+
+ /**
+ Get the attribute key.
+ @return the attribute key
+ */
+ public String getKey() {
+ return key;
+ }
+
+ /**
+ Set the attribute key. Gets normalised as per the constructor method.
+ @param key the new key; must not be null
+ */
+ public void setKey(String key) {
+ Validate.notEmpty(key);
+ this.key = key.trim().toLowerCase();
+ }
+
+ /**
+ Get the attribute value.
+ @return the attribute value
+ */
+ public String getValue() {
+ return value;
+ }
+
+ /**
+ Set the attribute value.
+ @param value the new attribute value; must not be null
+ */
+ public String setValue(String value) {
+ Validate.notNull(value);
+ String old = this.value;
+ this.value = value;
+ return old;
+ }
+
+ /**
+ Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
+ @return HTML
+ */
+ public String html() {
+ return key + "=\"" + Entities.escape(value, (new Document("")).outputSettings()) + "\"";
+ }
+
+ protected void html(StringBuilder accum, Document.OutputSettings out) {
+ accum
+ .append(key)
+ .append("=\"")
+ .append(Entities.escape(value, out))
+ .append("\"");
+ }
+
+ /**
+ Get the string representation of this attribute, implemented as {@link #html()}.
+ @return string
+ */
+ public String toString() {
+ return html();
+ }
+
+ /**
+ * Create a new Attribute from an unencoded key and a HTML attribute encoded value.
+ * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
+ * @param encodedValue HTML attribute encoded value
+ * @return attribute
+ */
+ public static Attribute createFromEncoded(String unencodedKey, String encodedValue) {
+ String value = Entities.unescape(encodedValue, true);
+ return new Attribute(unencodedKey, value);
+ }
+
+ protected boolean isDataAttribute() {
+ return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof Attribute)) return false;
+
+ Attribute attribute = (Attribute) o;
+
+ if (key != null ? !key.equals(attribute.key) : attribute.key != null) return false;
+ if (value != null ? !value.equals(attribute.value) : attribute.value != null) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = key != null ? key.hashCode() : 0;
+ result = 31 * result + (value != null ? value.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public Attribute clone() {
+ try {
+ return (Attribute) super.clone(); // only fields are immutable strings key and value, so no more deep copy required
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Attributes.java b/server/src/org/jsoup/nodes/Attributes.java
new file mode 100644
index 0000000000..9436750fc9
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Attributes.java
@@ -0,0 +1,249 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.Validate;
+
+import java.util.*;
+
+/**
+ * The attributes of an Element.
+ * <p/>
+ * Attributes are treated as a map: there can be only one value associated with an attribute key.
+ * <p/>
+ * Attribute key and value comparisons are done case insensitively, and keys are normalised to
+ * lower-case.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class Attributes implements Iterable<Attribute>, Cloneable {
+ protected static final String dataPrefix = "data-";
+
+ private LinkedHashMap<String, Attribute> attributes = null;
+ // linked hash map to preserve insertion order.
+ // null be default as so many elements have no attributes -- saves a good chunk of memory
+
+ /**
+ Get an attribute value by key.
+ @param key the attribute key
+ @return the attribute value if set; or empty string if not set.
+ @see #hasKey(String)
+ */
+ public String get(String key) {
+ Validate.notEmpty(key);
+
+ if (attributes == null)
+ return "";
+
+ Attribute attr = attributes.get(key.toLowerCase());
+ return attr != null ? attr.getValue() : "";
+ }
+
+ /**
+ Set a new attribute, or replace an existing one by key.
+ @param key attribute key
+ @param value attribute value
+ */
+ public void put(String key, String value) {
+ Attribute attr = new Attribute(key, value);
+ put(attr);
+ }
+
+ /**
+ Set a new attribute, or replace an existing one by key.
+ @param attribute attribute
+ */
+ public void put(Attribute attribute) {
+ Validate.notNull(attribute);
+ if (attributes == null)
+ attributes = new LinkedHashMap<String, Attribute>(2);
+ attributes.put(attribute.getKey(), attribute);
+ }
+
+ /**
+ Remove an attribute by key.
+ @param key attribute key to remove
+ */
+ public void remove(String key) {
+ Validate.notEmpty(key);
+ if (attributes == null)
+ return;
+ attributes.remove(key.toLowerCase());
+ }
+
+ /**
+ Tests if these attributes contain an attribute with this key.
+ @param key key to check for
+ @return true if key exists, false otherwise
+ */
+ public boolean hasKey(String key) {
+ return attributes != null && attributes.containsKey(key.toLowerCase());
+ }
+
+ /**
+ Get the number of attributes in this set.
+ @return size
+ */
+ public int size() {
+ if (attributes == null)
+ return 0;
+ return attributes.size();
+ }
+
+ /**
+ Add all the attributes from the incoming set to this set.
+ @param incoming attributes to add to these attributes.
+ */
+ public void addAll(Attributes incoming) {
+ if (incoming.size() == 0)
+ return;
+ if (attributes == null)
+ attributes = new LinkedHashMap<String, Attribute>(incoming.size());
+ attributes.putAll(incoming.attributes);
+ }
+
+ public Iterator<Attribute> iterator() {
+ return asList().iterator();
+ }
+
+ /**
+ Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
+ to keys will not be recognised in the containing set.
+ @return an view of the attributes as a List.
+ */
+ public List<Attribute> asList() {
+ if (attributes == null)
+ return Collections.emptyList();
+
+ List<Attribute> list = new ArrayList<Attribute>(attributes.size());
+ for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
+ list.add(entry.getValue());
+ }
+ return Collections.unmodifiableList(list);
+ }
+
+ /**
+ * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
+ * starting with {@code data-}.
+ * @return map of custom data attributes.
+ */
+ public Map<String, String> dataset() {
+ return new Dataset();
+ }
+
+ /**
+ Get the HTML representation of these attributes.
+ @return HTML
+ */
+ public String html() {
+ StringBuilder accum = new StringBuilder();
+ html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
+ return accum.toString();
+ }
+
+ void html(StringBuilder accum, Document.OutputSettings out) {
+ if (attributes == null)
+ return;
+
+ for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
+ Attribute attribute = entry.getValue();
+ accum.append(" ");
+ attribute.html(accum, out);
+ }
+ }
+
+ public String toString() {
+ return html();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof Attributes)) return false;
+
+ Attributes that = (Attributes) o;
+
+ if (attributes != null ? !attributes.equals(that.attributes) : that.attributes != null) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return attributes != null ? attributes.hashCode() : 0;
+ }
+
+ @Override
+ public Attributes clone() {
+ if (attributes == null)
+ return new Attributes();
+
+ Attributes clone;
+ try {
+ clone = (Attributes) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+ clone.attributes = new LinkedHashMap<String, Attribute>(attributes.size());
+ for (Attribute attribute: this)
+ clone.attributes.put(attribute.getKey(), attribute.clone());
+ return clone;
+ }
+
+ private class Dataset extends AbstractMap<String, String> {
+
+ private Dataset() {
+ if (attributes == null)
+ attributes = new LinkedHashMap<String, Attribute>(2);
+ }
+
+ public Set<Entry<String, String>> entrySet() {
+ return new EntrySet();
+ }
+
+ @Override
+ public String put(String key, String value) {
+ String dataKey = dataKey(key);
+ String oldValue = hasKey(dataKey) ? attributes.get(dataKey).getValue() : null;
+ Attribute attr = new Attribute(dataKey, value);
+ attributes.put(dataKey, attr);
+ return oldValue;
+ }
+
+ private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
+ public Iterator<Map.Entry<String, String>> iterator() {
+ return new DatasetIterator();
+ }
+
+ public int size() {
+ int count = 0;
+ Iterator iter = new DatasetIterator();
+ while (iter.hasNext())
+ count++;
+ return count;
+ }
+ }
+
+ private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
+ private Iterator<Attribute> attrIter = attributes.values().iterator();
+ private Attribute attr;
+ public boolean hasNext() {
+ while (attrIter.hasNext()) {
+ attr = attrIter.next();
+ if (attr.isDataAttribute()) return true;
+ }
+ return false;
+ }
+
+ public Entry<String, String> next() {
+ return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
+ }
+
+ public void remove() {
+ attributes.remove(attr.getKey());
+ }
+ }
+ }
+
+ private static String dataKey(String key) {
+ return dataPrefix + key;
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Comment.java b/server/src/org/jsoup/nodes/Comment.java
new file mode 100644
index 0000000000..37fd4368fa
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Comment.java
@@ -0,0 +1,46 @@
+package org.jsoup.nodes;
+
+/**
+ A comment node.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class Comment extends Node {
+ private static final String COMMENT_KEY = "comment";
+
+ /**
+ Create a new comment node.
+ @param data The contents of the comment
+ @param baseUri base URI
+ */
+ public Comment(String data, String baseUri) {
+ super(baseUri);
+ attributes.put(COMMENT_KEY, data);
+ }
+
+ public String nodeName() {
+ return "#comment";
+ }
+
+ /**
+ Get the contents of the comment.
+ @return comment content
+ */
+ public String getData() {
+ return attributes.get(COMMENT_KEY);
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ if (out.prettyPrint())
+ indent(accum, depth, out);
+ accum
+ .append("<!--")
+ .append(getData())
+ .append("-->");
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+
+ public String toString() {
+ return outerHtml();
+ }
+}
diff --git a/server/src/org/jsoup/nodes/DataNode.java b/server/src/org/jsoup/nodes/DataNode.java
new file mode 100644
index 0000000000..a64f56f0a4
--- /dev/null
+++ b/server/src/org/jsoup/nodes/DataNode.java
@@ -0,0 +1,62 @@
+package org.jsoup.nodes;
+
+/**
+ A data node, for contents of style, script tags etc, where contents should not show in text().
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class DataNode extends Node{
+ private static final String DATA_KEY = "data";
+
+ /**
+ Create a new DataNode.
+ @param data data contents
+ @param baseUri base URI
+ */
+ public DataNode(String data, String baseUri) {
+ super(baseUri);
+ attributes.put(DATA_KEY, data);
+ }
+
+ public String nodeName() {
+ return "#data";
+ }
+
+ /**
+ Get the data contents of this node. Will be unescaped and with original new lines, space etc.
+ @return data
+ */
+ public String getWholeData() {
+ return attributes.get(DATA_KEY);
+ }
+
+ /**
+ * Set the data contents of this node.
+ * @param data unencoded data
+ * @return this node, for chaining
+ */
+ public DataNode setWholeData(String data) {
+ attributes.put(DATA_KEY, data);
+ return this;
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ /**
+ Create a new DataNode from HTML encoded data.
+ @param encodedData encoded data
+ @param baseUri bass URI
+ @return new DataNode
+ */
+ public static DataNode createFromEncoded(String encodedData, String baseUri) {
+ String data = Entities.unescape(encodedData);
+ return new DataNode(data, baseUri);
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java
new file mode 100644
index 0000000000..adb371ce14
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Document.java
@@ -0,0 +1,350 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Tag;
+import org.jsoup.select.Elements;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ A HTML Document.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class Document extends Element {
+ private OutputSettings outputSettings = new OutputSettings();
+ private QuirksMode quirksMode = QuirksMode.noQuirks;
+
+ /**
+ Create a new, empty Document.
+ @param baseUri base URI of document
+ @see org.jsoup.Jsoup#parse
+ @see #createShell
+ */
+ public Document(String baseUri) {
+ super(Tag.valueOf("#root"), baseUri);
+ }
+
+ /**
+ Create a valid, empty shell of a document, suitable for adding more elements to.
+ @param baseUri baseUri of document
+ @return document with html, head, and body elements.
+ */
+ static public Document createShell(String baseUri) {
+ Validate.notNull(baseUri);
+
+ Document doc = new Document(baseUri);
+ Element html = doc.appendElement("html");
+ html.appendElement("head");
+ html.appendElement("body");
+
+ return doc;
+ }
+
+ /**
+ Accessor to the document's {@code head} element.
+ @return {@code head}
+ */
+ public Element head() {
+ return findFirstElementByTagName("head", this);
+ }
+
+ /**
+ Accessor to the document's {@code body} element.
+ @return {@code body}
+ */
+ public Element body() {
+ return findFirstElementByTagName("body", this);
+ }
+
+ /**
+ Get the string contents of the document's {@code title} element.
+ @return Trimmed title, or empty string if none set.
+ */
+ public String title() {
+ Element titleEl = getElementsByTag("title").first();
+ return titleEl != null ? titleEl.text().trim() : "";
+ }
+
+ /**
+ Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if
+ not present
+ @param title string to set as title
+ */
+ public void title(String title) {
+ Validate.notNull(title);
+ Element titleEl = getElementsByTag("title").first();
+ if (titleEl == null) { // add to head
+ head().appendElement("title").text(title);
+ } else {
+ titleEl.text(title);
+ }
+ }
+
+ /**
+ Create a new Element, with this document's base uri. Does not make the new element a child of this document.
+ @param tagName element tag name (e.g. {@code a})
+ @return new element
+ */
+ public Element createElement(String tagName) {
+ return new Element(Tag.valueOf(tagName), this.baseUri());
+ }
+
+ /**
+ Normalise the document. This happens after the parse phase so generally does not need to be called.
+ Moves any text content that is not in the body element into the body.
+ @return this document after normalisation
+ */
+ public Document normalise() {
+ Element htmlEl = findFirstElementByTagName("html", this);
+ if (htmlEl == null)
+ htmlEl = appendElement("html");
+ if (head() == null)
+ htmlEl.prependElement("head");
+ if (body() == null)
+ htmlEl.appendElement("body");
+
+ // pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care
+ // of. do in inverse order to maintain text order.
+ normaliseTextNodes(head());
+ normaliseTextNodes(htmlEl);
+ normaliseTextNodes(this);
+
+ normaliseStructure("head", htmlEl);
+ normaliseStructure("body", htmlEl);
+
+ return this;
+ }
+
+ // does not recurse.
+ private void normaliseTextNodes(Element element) {
+ List<Node> toMove = new ArrayList<Node>();
+ for (Node node: element.childNodes) {
+ if (node instanceof TextNode) {
+ TextNode tn = (TextNode) node;
+ if (!tn.isBlank())
+ toMove.add(tn);
+ }
+ }
+
+ for (int i = toMove.size()-1; i >= 0; i--) {
+ Node node = toMove.get(i);
+ element.removeChild(node);
+ body().prependChild(new TextNode(" ", ""));
+ body().prependChild(node);
+ }
+ }
+
+ // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
+ private void normaliseStructure(String tag, Element htmlEl) {
+ Elements elements = this.getElementsByTag(tag);
+ Element master = elements.first(); // will always be available as created above if not existent
+ if (elements.size() > 1) { // dupes, move contents to master
+ List<Node> toMove = new ArrayList<Node>();
+ for (int i = 1; i < elements.size(); i++) {
+ Node dupe = elements.get(i);
+ for (Node node : dupe.childNodes)
+ toMove.add(node);
+ dupe.remove();
+ }
+
+ for (Node dupe : toMove)
+ master.appendChild(dupe);
+ }
+ // ensure parented by <html>
+ if (!master.parent().equals(htmlEl)) {
+ htmlEl.appendChild(master); // includes remove()
+ }
+ }
+
+ // fast method to get first by tag name, used for html, head, body finders
+ private Element findFirstElementByTagName(String tag, Node node) {
+ if (node.nodeName().equals(tag))
+ return (Element) node;
+ else {
+ for (Node child: node.childNodes) {
+ Element found = findFirstElementByTagName(tag, child);
+ if (found != null)
+ return found;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public String outerHtml() {
+ return super.html(); // no outer wrapper tag
+ }
+
+ /**
+ Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
+ @param text unencoded text
+ @return this document
+ */
+ @Override
+ public Element text(String text) {
+ body().text(text); // overridden to not nuke doc structure
+ return this;
+ }
+
+ @Override
+ public String nodeName() {
+ return "#document";
+ }
+
+ @Override
+ public Document clone() {
+ Document clone = (Document) super.clone();
+ clone.outputSettings = this.outputSettings.clone();
+ return clone;
+ }
+
+ /**
+ * A Document's output settings control the form of the text() and html() methods.
+ */
+ public static class OutputSettings implements Cloneable {
+ private Entities.EscapeMode escapeMode = Entities.EscapeMode.base;
+ private Charset charset = Charset.forName("UTF-8");
+ private CharsetEncoder charsetEncoder = charset.newEncoder();
+ private boolean prettyPrint = true;
+ private int indentAmount = 1;
+
+ public OutputSettings() {}
+
+ /**
+ * Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML
+ * entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>,
+ * which uses the complete set of HTML named entities.
+ * <p>
+ * The default escape mode is <code>base</code>.
+ * @return the document's current escape mode
+ */
+ public Entities.EscapeMode escapeMode() {
+ return escapeMode;
+ }
+
+ /**
+ * Set the document's escape mode
+ * @param escapeMode the new escape mode to use
+ * @return the document's output settings, for chaining
+ */
+ public OutputSettings escapeMode(Entities.EscapeMode escapeMode) {
+ this.escapeMode = escapeMode;
+ return this;
+ }
+
+ /**
+ * Get the document's current output charset, which is used to control which characters are escaped when
+ * generating HTML (via the <code>html()</code> methods), and which are kept intact.
+ * <p>
+ * Where possible (when parsing from a URL or File), the document's output charset is automatically set to the
+ * input charset. Otherwise, it defaults to UTF-8.
+ * @return the document's current charset.
+ */
+ public Charset charset() {
+ return charset;
+ }
+
+ /**
+ * Update the document's output charset.
+ * @param charset the new charset to use.
+ * @return the document's output settings, for chaining
+ */
+ public OutputSettings charset(Charset charset) {
+ // todo: this should probably update the doc's meta charset
+ this.charset = charset;
+ charsetEncoder = charset.newEncoder();
+ return this;
+ }
+
+ /**
+ * Update the document's output charset.
+ * @param charset the new charset (by name) to use.
+ * @return the document's output settings, for chaining
+ */
+ public OutputSettings charset(String charset) {
+ charset(Charset.forName(charset));
+ return this;
+ }
+
+ CharsetEncoder encoder() {
+ return charsetEncoder;
+ }
+
+ /**
+ * Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format
+ * the output, and the output will generally look like the input.
+ * @return if pretty printing is enabled.
+ */
+ public boolean prettyPrint() {
+ return prettyPrint;
+ }
+
+ /**
+ * Enable or disable pretty printing.
+ * @param pretty new pretty print setting
+ * @return this, for chaining
+ */
+ public OutputSettings prettyPrint(boolean pretty) {
+ prettyPrint = pretty;
+ return this;
+ }
+
+ /**
+ * Get the current tag indent amount, used when pretty printing.
+ * @return the current indent amount
+ */
+ public int indentAmount() {
+ return indentAmount;
+ }
+
+ /**
+ * Set the indent amount for pretty printing
+ * @param indentAmount number of spaces to use for indenting each level. Must be >= 0.
+ * @return this, for chaining
+ */
+ public OutputSettings indentAmount(int indentAmount) {
+ Validate.isTrue(indentAmount >= 0);
+ this.indentAmount = indentAmount;
+ return this;
+ }
+
+ @Override
+ public OutputSettings clone() {
+ OutputSettings clone;
+ try {
+ clone = (OutputSettings) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+ clone.charset(charset.name()); // new charset and charset encoder
+ clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
+ // indentAmount, prettyPrint are primitives so object.clone() will handle
+ return clone;
+ }
+ }
+
+ /**
+ * Get the document's current output settings.
+ * @return the document's current output settings.
+ */
+ public OutputSettings outputSettings() {
+ return outputSettings;
+ }
+
+ public enum QuirksMode {
+ noQuirks, quirks, limitedQuirks;
+ }
+
+ public QuirksMode quirksMode() {
+ return quirksMode;
+ }
+
+ public Document quirksMode(QuirksMode quirksMode) {
+ this.quirksMode = quirksMode;
+ return this;
+ }
+}
+
diff --git a/server/src/org/jsoup/nodes/DocumentType.java b/server/src/org/jsoup/nodes/DocumentType.java
new file mode 100644
index 0000000000..f8c79f0d18
--- /dev/null
+++ b/server/src/org/jsoup/nodes/DocumentType.java
@@ -0,0 +1,46 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+
+/**
+ * A {@code <!DOCTPYE>} node.
+ */
+public class DocumentType extends Node {
+ // todo: quirk mode from publicId and systemId
+
+ /**
+ * Create a new doctype element.
+ * @param name the doctype's name
+ * @param publicId the doctype's public ID
+ * @param systemId the doctype's system ID
+ * @param baseUri the doctype's base URI
+ */
+ public DocumentType(String name, String publicId, String systemId, String baseUri) {
+ super(baseUri);
+
+ Validate.notEmpty(name);
+ attr("name", name);
+ attr("publicId", publicId);
+ attr("systemId", systemId);
+ }
+
+ @Override
+ public String nodeName() {
+ return "#doctype";
+ }
+
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ accum.append("<!DOCTYPE ").append(attr("name"));
+ if (!StringUtil.isBlank(attr("publicId")))
+ accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
+ if (!StringUtil.isBlank(attr("systemId")))
+ accum.append(" \"").append(attr("systemId")).append("\"");
+ accum.append('>');
+ }
+
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Element.java b/server/src/org/jsoup/nodes/Element.java
new file mode 100644
index 0000000000..5c1894c934
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Element.java
@@ -0,0 +1,1119 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Parser;
+import org.jsoup.parser.Tag;
+import org.jsoup.select.Collector;
+import org.jsoup.select.Elements;
+import org.jsoup.select.Evaluator;
+import org.jsoup.select.Selector;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
+ * other elements).
+ *
+ * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class Element extends Node {
+ private Tag tag;
+ private Set<String> classNames;
+
+ /**
+ * Create a new, standalone Element. (Standalone in that is has no parent.)
+ *
+ * @param tag tag of this element
+ * @param baseUri the base URI
+ * @param attributes initial attributes
+ * @see #appendChild(Node)
+ * @see #appendElement(String)
+ */
+ public Element(Tag tag, String baseUri, Attributes attributes) {
+ super(baseUri, attributes);
+
+ Validate.notNull(tag);
+ this.tag = tag;
+ }
+
+ /**
+ * Create a new Element from a tag and a base URI.
+ *
+ * @param tag element tag
+ * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
+ * string, but not null.
+ * @see Tag#valueOf(String)
+ */
+ public Element(Tag tag, String baseUri) {
+ this(tag, baseUri, new Attributes());
+ }
+
+ @Override
+ public String nodeName() {
+ return tag.getName();
+ }
+
+ /**
+ * Get the name of the tag for this element. E.g. {@code div}
+ *
+ * @return the tag name
+ */
+ public String tagName() {
+ return tag.getName();
+ }
+
+ /**
+ * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
+ * {@code el.tagName("div");}.
+ *
+ * @param tagName new tag name for this element
+ * @return this element, for chaining
+ */
+ public Element tagName(String tagName) {
+ Validate.notEmpty(tagName, "Tag name must not be empty.");
+ tag = Tag.valueOf(tagName);
+ return this;
+ }
+
+ /**
+ * Get the Tag for this element.
+ *
+ * @return the tag object
+ */
+ public Tag tag() {
+ return tag;
+ }
+
+ /**
+ * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
+ * {@code <p> == false}).
+ *
+ * @return true if block, false if not (and thus inline)
+ */
+ public boolean isBlock() {
+ return tag.isBlock();
+ }
+
+ /**
+ * Get the {@code id} attribute of this element.
+ *
+ * @return The id attribute, if present, or an empty string if not.
+ */
+ public String id() {
+ String id = attr("id");
+ return id == null ? "" : id;
+ }
+
+ /**
+ * Set an attribute value on this element. If this element already has an attribute with the
+ * key, its value is updated; otherwise, a new attribute is added.
+ *
+ * @return this element
+ */
+ public Element attr(String attributeKey, String attributeValue) {
+ super.attr(attributeKey, attributeValue);
+ return this;
+ }
+
+ /**
+ * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
+ * starting with "data-" is included the dataset.
+ * <p>
+ * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
+ * {@code package=jsoup, language=java}.
+ * <p>
+ * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
+ * in the other map.
+ * <p>
+ * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
+ * @return a map of {@code key=value} custom data attributes.
+ */
+ public Map<String, String> dataset() {
+ return attributes.dataset();
+ }
+
+ @Override
+ public final Element parent() {
+ return (Element) parentNode;
+ }
+
+ /**
+ * Get this element's parent and ancestors, up to the document root.
+ * @return this element's stack of parents, closest first.
+ */
+ public Elements parents() {
+ Elements parents = new Elements();
+ accumulateParents(this, parents);
+ return parents;
+ }
+
+ private static void accumulateParents(Element el, Elements parents) {
+ Element parent = el.parent();
+ if (parent != null && !parent.tagName().equals("#root")) {
+ parents.add(parent);
+ accumulateParents(parent, parents);
+ }
+ }
+
+ /**
+ * Get a child element of this element, by its 0-based index number.
+ * <p/>
+ * Note that an element can have both mixed Nodes and Elements as children. This method inspects
+ * a filtered list of children that are elements, and the index is based on that filtered list.
+ *
+ * @param index the index number of the element to retrieve
+ * @return the child element, if it exists, or {@code null} if absent.
+ * @see #childNode(int)
+ */
+ public Element child(int index) {
+ return children().get(index);
+ }
+
+ /**
+ * Get this element's child elements.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Element nodes.
+ * @return child elements. If this element has no children, returns an
+ * empty list.
+ * @see #childNodes()
+ */
+ public Elements children() {
+ // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change
+ List<Element> elements = new ArrayList<Element>();
+ for (Node node : childNodes) {
+ if (node instanceof Element)
+ elements.add((Element) node);
+ }
+ return new Elements(elements);
+ }
+
+ /**
+ * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Text nodes.
+ * @return child text nodes. If this element has no text nodes, returns an
+ * empty list.
+ * <p/>
+ * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
+ * <ul>
+ * <li>{@code p.text()} = {@code "One Two Three Four"}</li>
+ * <li>{@code p.ownText()} = {@code "One Three Four"}</li>
+ * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
+ * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
+ * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
+ * </ul>
+ */
+ public List<TextNode> textNodes() {
+ List<TextNode> textNodes = new ArrayList<TextNode>();
+ for (Node node : childNodes) {
+ if (node instanceof TextNode)
+ textNodes.add((TextNode) node);
+ }
+ return Collections.unmodifiableList(textNodes);
+ }
+
+ /**
+ * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Data nodes.
+ * @return child data nodes. If this element has no data nodes, returns an
+ * empty list.
+ * @see #data()
+ */
+ public List<DataNode> dataNodes() {
+ List<DataNode> dataNodes = new ArrayList<DataNode>();
+ for (Node node : childNodes) {
+ if (node instanceof DataNode)
+ dataNodes.add((DataNode) node);
+ }
+ return Collections.unmodifiableList(dataNodes);
+ }
+
+ /**
+ * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
+ * may include this element, or any of its children.
+ * <p/>
+ * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
+ * multiple filters can be combined, e.g.:
+ * <ul>
+ * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
+ * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
+ * </ul>
+ * <p/>
+ * See the query syntax documentation in {@link org.jsoup.select.Selector}.
+ *
+ * @param cssQuery a {@link Selector} CSS-like query
+ * @return elements that match the query (empty if none match)
+ * @see org.jsoup.select.Selector
+ */
+ public Elements select(String cssQuery) {
+ return Selector.select(cssQuery, this);
+ }
+
+ /**
+ * Add a node child node to this element.
+ *
+ * @param child node to add. Must not already have a parent.
+ * @return this element, so that you can add more child nodes or elements.
+ */
+ public Element appendChild(Node child) {
+ Validate.notNull(child);
+
+ addChildren(child);
+ return this;
+ }
+
+ /**
+ * Add a node to the start of this element's children.
+ *
+ * @param child node to add. Must not already have a parent.
+ * @return this element, so that you can add more child nodes or elements.
+ */
+ public Element prependChild(Node child) {
+ Validate.notNull(child);
+
+ addChildren(0, child);
+ return this;
+ }
+
+ /**
+ * Create a new element by tag name, and add it as the last child.
+ *
+ * @param tagName the name of the tag (e.g. {@code div}).
+ * @return the new element, to allow you to add content to it, e.g.:
+ * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
+ */
+ public Element appendElement(String tagName) {
+ Element child = new Element(Tag.valueOf(tagName), baseUri());
+ appendChild(child);
+ return child;
+ }
+
+ /**
+ * Create a new element by tag name, and add it as the first child.
+ *
+ * @param tagName the name of the tag (e.g. {@code div}).
+ * @return the new element, to allow you to add content to it, e.g.:
+ * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
+ */
+ public Element prependElement(String tagName) {
+ Element child = new Element(Tag.valueOf(tagName), baseUri());
+ prependChild(child);
+ return child;
+ }
+
+ /**
+ * Create and append a new TextNode to this element.
+ *
+ * @param text the unencoded text to add
+ * @return this element
+ */
+ public Element appendText(String text) {
+ TextNode node = new TextNode(text, baseUri());
+ appendChild(node);
+ return this;
+ }
+
+ /**
+ * Create and prepend a new TextNode to this element.
+ *
+ * @param text the unencoded text to add
+ * @return this element
+ */
+ public Element prependText(String text) {
+ TextNode node = new TextNode(text, baseUri());
+ prependChild(node);
+ return this;
+ }
+
+ /**
+ * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
+ * @param html HTML to add inside this element, after the existing HTML
+ * @return this element
+ * @see #html(String)
+ */
+ public Element append(String html) {
+ Validate.notNull(html);
+
+ List<Node> nodes = Parser.parseFragment(html, this, baseUri());
+ addChildren(nodes.toArray(new Node[nodes.size()]));
+ return this;
+ }
+
+ /**
+ * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
+ * @param html HTML to add inside this element, before the existing HTML
+ * @return this element
+ * @see #html(String)
+ */
+ public Element prepend(String html) {
+ Validate.notNull(html);
+
+ List<Node> nodes = Parser.parseFragment(html, this, baseUri());
+ addChildren(0, nodes.toArray(new Node[nodes.size()]));
+ return this;
+ }
+
+ /**
+ * Insert the specified HTML into the DOM before this element (i.e. as a preceding sibling).
+ *
+ * @param html HTML to add before this element
+ * @return this element, for chaining
+ * @see #after(String)
+ */
+ @Override
+ public Element before(String html) {
+ return (Element) super.before(html);
+ }
+
+ /**
+ * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
+ * @param node to add before this element
+ * @return this Element, for chaining
+ * @see #after(Node)
+ */
+ @Override
+ public Element before(Node node) {
+ return (Element) super.before(node);
+ }
+
+ /**
+ * Insert the specified HTML into the DOM after this element (i.e. as a following sibling).
+ *
+ * @param html HTML to add after this element
+ * @return this element, for chaining
+ * @see #before(String)
+ */
+ @Override
+ public Element after(String html) {
+ return (Element) super.after(html);
+ }
+
+ /**
+ * Insert the specified node into the DOM after this node (i.e. as a following sibling).
+ * @param node to add after this element
+ * @return this element, for chaining
+ * @see #before(Node)
+ */
+ @Override
+ public Element after(Node node) {
+ return (Element) super.after(node);
+ }
+
+ /**
+ * Remove all of the element's child nodes. Any attributes are left as-is.
+ * @return this element
+ */
+ public Element empty() {
+ childNodes.clear();
+ return this;
+ }
+
+ /**
+ * Wrap the supplied HTML around this element.
+ *
+ * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
+ * @return this element, for chaining.
+ */
+ @Override
+ public Element wrap(String html) {
+ return (Element) super.wrap(html);
+ }
+
+ /**
+ * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
+ * of itself, so will not be included in the returned list.
+ * @return sibling elements
+ */
+ public Elements siblingElements() {
+ if (parentNode == null)
+ return new Elements(0);
+
+ List<Element> elements = parent().children();
+ Elements siblings = new Elements(elements.size() - 1);
+ for (Element el: elements)
+ if (el != this)
+ siblings.add(el);
+ return siblings;
+ }
+
+ /**
+ * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
+ * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
+ * <p/>
+ * This is similar to {@link #nextSibling()}, but specifically finds only Elements
+ * @return the next element, or null if there is no next element
+ * @see #previousElementSibling()
+ */
+ public Element nextElementSibling() {
+ if (parentNode == null) return null;
+ List<Element> siblings = parent().children();
+ Integer index = indexInList(this, siblings);
+ Validate.notNull(index);
+ if (siblings.size() > index+1)
+ return siblings.get(index+1);
+ else
+ return null;
+ }
+
+ /**
+ * Gets the previous element sibling of this element.
+ * @return the previous element, or null if there is no previous element
+ * @see #nextElementSibling()
+ */
+ public Element previousElementSibling() {
+ if (parentNode == null) return null;
+ List<Element> siblings = parent().children();
+ Integer index = indexInList(this, siblings);
+ Validate.notNull(index);
+ if (index > 0)
+ return siblings.get(index-1);
+ else
+ return null;
+ }
+
+ /**
+ * Gets the first element sibling of this element.
+ * @return the first sibling that is an element (aka the parent's first element child)
+ */
+ public Element firstElementSibling() {
+ // todo: should firstSibling() exclude this?
+ List<Element> siblings = parent().children();
+ return siblings.size() > 1 ? siblings.get(0) : null;
+ }
+
+ /**
+ * Get the list index of this element in its element sibling list. I.e. if this is the first element
+ * sibling, returns 0.
+ * @return position in element sibling list
+ */
+ public Integer elementSiblingIndex() {
+ if (parent() == null) return 0;
+ return indexInList(this, parent().children());
+ }
+
+ /**
+ * Gets the last element sibling of this element
+ * @return the last sibling that is an element (aka the parent's last element child)
+ */
+ public Element lastElementSibling() {
+ List<Element> siblings = parent().children();
+ return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null;
+ }
+
+ private static <E extends Element> Integer indexInList(Element search, List<E> elements) {
+ Validate.notNull(search);
+ Validate.notNull(elements);
+
+ for (int i = 0; i < elements.size(); i++) {
+ E element = elements.get(i);
+ if (element.equals(search))
+ return i;
+ }
+ return null;
+ }
+
+ // DOM type methods
+
+ /**
+ * Finds elements, including and recursively under this element, with the specified tag name.
+ * @param tagName The tag name to search for (case insensitively).
+ * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
+ */
+ public Elements getElementsByTag(String tagName) {
+ Validate.notEmpty(tagName);
+ tagName = tagName.toLowerCase().trim();
+
+ return Collector.collect(new Evaluator.Tag(tagName), this);
+ }
+
+ /**
+ * Find an element by ID, including or under this element.
+ * <p>
+ * Note that this finds the first matching ID, starting with this element. If you search down from a different
+ * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
+ * use {@link Document#getElementById(String)}
+ * @param id The ID to search for.
+ * @return The first matching element by ID, starting with this element, or null if none found.
+ */
+ public Element getElementById(String id) {
+ Validate.notEmpty(id);
+
+ Elements elements = Collector.collect(new Evaluator.Id(id), this);
+ if (elements.size() > 0)
+ return elements.get(0);
+ else
+ return null;
+ }
+
+ /**
+ * Find elements that have this class, including or under this element. Case insensitive.
+ * <p>
+ * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method
+ * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
+ *
+ * @param className the name of the class to search for.
+ * @return elements with the supplied class name, empty if none
+ * @see #hasClass(String)
+ * @see #classNames()
+ */
+ public Elements getElementsByClass(String className) {
+ Validate.notEmpty(className);
+
+ return Collector.collect(new Evaluator.Class(className), this);
+ }
+
+ /**
+ * Find elements that have a named attribute set. Case insensitive.
+ *
+ * @param key name of the attribute, e.g. {@code href}
+ * @return elements that have this attribute, empty if none
+ */
+ public Elements getElementsByAttribute(String key) {
+ Validate.notEmpty(key);
+ key = key.trim().toLowerCase();
+
+ return Collector.collect(new Evaluator.Attribute(key), this);
+ }
+
+ /**
+ * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
+ * that have HTML5 datasets.
+ * @param keyPrefix name prefix of the attribute e.g. {@code data-}
+ * @return elements that have attribute names that start with with the prefix, empty if none.
+ */
+ public Elements getElementsByAttributeStarting(String keyPrefix) {
+ Validate.notEmpty(keyPrefix);
+ keyPrefix = keyPrefix.trim().toLowerCase();
+
+ return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
+ }
+
+ /**
+ * Find elements that have an attribute with the specific value. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param value value of the attribute
+ * @return elements that have this attribute with this value, empty if none
+ */
+ public Elements getElementsByAttributeValue(String key, String value) {
+ return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
+ }
+
+ /**
+ * Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param value value of the attribute
+ * @return elements that do not have a matching attribute
+ */
+ public Elements getElementsByAttributeValueNot(String key, String value) {
+ return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
+ }
+
+ /**
+ * Find elements that have attributes that start with the value prefix. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param valuePrefix start of attribute value
+ * @return elements that have attributes that start with the value prefix
+ */
+ public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
+ return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
+ }
+
+ /**
+ * Find elements that have attributes that end with the value suffix. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param valueSuffix end of the attribute value
+ * @return elements that have attributes that end with the value suffix
+ */
+ public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
+ return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
+ }
+
+ /**
+ * Find elements that have attributes whose value contains the match string. Case insensitive.
+ *
+ * @param key name of the attribute
+ * @param match substring of value to search for
+ * @return elements that have attributes containing this text
+ */
+ public Elements getElementsByAttributeValueContaining(String key, String match) {
+ return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
+ }
+
+ /**
+ * Find elements that have attributes whose values match the supplied regular expression.
+ * @param key name of the attribute
+ * @param pattern compiled regular expression to match against attribute values
+ * @return elements that have attributes matching this regular expression
+ */
+ public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
+ return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
+
+ }
+
+ /**
+ * Find elements that have attributes whose values match the supplied regular expression.
+ * @param key name of the attribute
+ * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements that have attributes matching this regular expression
+ */
+ public Elements getElementsByAttributeValueMatching(String key, String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsByAttributeValueMatching(key, pattern);
+ }
+
+ /**
+ * Find elements whose sibling index is less than the supplied index.
+ * @param index 0-based index
+ * @return elements less than index
+ */
+ public Elements getElementsByIndexLessThan(int index) {
+ return Collector.collect(new Evaluator.IndexLessThan(index), this);
+ }
+
+ /**
+ * Find elements whose sibling index is greater than the supplied index.
+ * @param index 0-based index
+ * @return elements greater than index
+ */
+ public Elements getElementsByIndexGreaterThan(int index) {
+ return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
+ }
+
+ /**
+ * Find elements whose sibling index is equal to the supplied index.
+ * @param index 0-based index
+ * @return elements equal to index
+ */
+ public Elements getElementsByIndexEquals(int index) {
+ return Collector.collect(new Evaluator.IndexEquals(index), this);
+ }
+
+ /**
+ * Find elements that contain the specified string. The search is case insensitive. The text may appear directly
+ * in the element, or in any of its descendants.
+ * @param searchText to look for in the element's text
+ * @return elements that contain the string, case insensitive.
+ * @see Element#text()
+ */
+ public Elements getElementsContainingText(String searchText) {
+ return Collector.collect(new Evaluator.ContainsText(searchText), this);
+ }
+
+ /**
+ * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
+ * in the element, not in any of its descendants.
+ * @param searchText to look for in the element's own text
+ * @return elements that contain the string, case insensitive.
+ * @see Element#ownText()
+ */
+ public Elements getElementsContainingOwnText(String searchText) {
+ return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param pattern regular expression to match text against
+ * @return elements matching the supplied regular expression.
+ * @see Element#text()
+ */
+ public Elements getElementsMatchingText(Pattern pattern) {
+ return Collector.collect(new Evaluator.Matches(pattern), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements matching the supplied regular expression.
+ * @see Element#text()
+ */
+ public Elements getElementsMatchingText(String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsMatchingText(pattern);
+ }
+
+ /**
+ * Find elements whose own text matches the supplied regular expression.
+ * @param pattern regular expression to match text against
+ * @return elements matching the supplied regular expression.
+ * @see Element#ownText()
+ */
+ public Elements getElementsMatchingOwnText(Pattern pattern) {
+ return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
+ }
+
+ /**
+ * Find elements whose text matches the supplied regular expression.
+ * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * @return elements matching the supplied regular expression.
+ * @see Element#ownText()
+ */
+ public Elements getElementsMatchingOwnText(String regex) {
+ Pattern pattern;
+ try {
+ pattern = Pattern.compile(regex);
+ } catch (PatternSyntaxException e) {
+ throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ }
+ return getElementsMatchingOwnText(pattern);
+ }
+
+ /**
+ * Find all elements under this element (including self, and children of children).
+ *
+ * @return all elements
+ */
+ public Elements getAllElements() {
+ return Collector.collect(new Evaluator.AllElements(), this);
+ }
+
+ /**
+ * Gets the combined text of this element and all its children.
+ * <p>
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.text()} returns {@code "Hello there now!"}
+ *
+ * @return unencoded text, or empty string if none.
+ * @see #ownText()
+ * @see #textNodes()
+ */
+ public String text() {
+ StringBuilder sb = new StringBuilder();
+ text(sb);
+ return sb.toString().trim();
+ }
+
+ private void text(StringBuilder accum) {
+ appendWhitespaceIfBr(this, accum);
+
+ for (Node child : childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ appendNormalisedText(accum, textNode);
+ } else if (child instanceof Element) {
+ Element element = (Element) child;
+ if (accum.length() > 0 && element.isBlock() && !TextNode.lastCharIsWhitespace(accum))
+ accum.append(" ");
+ element.text(accum);
+ }
+ }
+ }
+
+ /**
+ * Gets the text owned by this element only; does not get the combined text of all children.
+ * <p>
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
+ * whereas {@code p.text()} returns {@code "Hello there now!"}.
+ * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
+ *
+ * @return unencoded text, or empty string if none.
+ * @see #text()
+ * @see #textNodes()
+ */
+ public String ownText() {
+ StringBuilder sb = new StringBuilder();
+ ownText(sb);
+ return sb.toString().trim();
+ }
+
+ private void ownText(StringBuilder accum) {
+ for (Node child : childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ appendNormalisedText(accum, textNode);
+ } else if (child instanceof Element) {
+ appendWhitespaceIfBr((Element) child, accum);
+ }
+ }
+ }
+
+ private void appendNormalisedText(StringBuilder accum, TextNode textNode) {
+ String text = textNode.getWholeText();
+
+ if (!preserveWhitespace()) {
+ text = TextNode.normaliseWhitespace(text);
+ if (TextNode.lastCharIsWhitespace(accum))
+ text = TextNode.stripLeadingWhitespace(text);
+ }
+ accum.append(text);
+ }
+
+ private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
+ if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
+ accum.append(" ");
+ }
+
+ boolean preserveWhitespace() {
+ return tag.preserveWhitespace() || parent() != null && parent().preserveWhitespace();
+ }
+
+ /**
+ * Set the text of this element. Any existing contents (text or elements) will be cleared
+ * @param text unencoded text
+ * @return this element
+ */
+ public Element text(String text) {
+ Validate.notNull(text);
+
+ empty();
+ TextNode textNode = new TextNode(text, baseUri);
+ appendChild(textNode);
+
+ return this;
+ }
+
+ /**
+ Test if this element has any text content (that is not just whitespace).
+ @return true if element has non-blank text content.
+ */
+ public boolean hasText() {
+ for (Node child: childNodes) {
+ if (child instanceof TextNode) {
+ TextNode textNode = (TextNode) child;
+ if (!textNode.isBlank())
+ return true;
+ } else if (child instanceof Element) {
+ Element el = (Element) child;
+ if (el.hasText())
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
+ * @return the data, or empty string if none
+ *
+ * @see #dataNodes()
+ */
+ public String data() {
+ StringBuilder sb = new StringBuilder();
+
+ for (Node childNode : childNodes) {
+ if (childNode instanceof DataNode) {
+ DataNode data = (DataNode) childNode;
+ sb.append(data.getWholeData());
+ } else if (childNode instanceof Element) {
+ Element element = (Element) childNode;
+ String elementData = element.data();
+ sb.append(elementData);
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
+ * separated. (E.g. on <code>&lt;div class="header gray"></code> returns, "<code>header gray</code>")
+ * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
+ */
+ public String className() {
+ return attr("class");
+ }
+
+ /**
+ * Get all of the element's class names. E.g. on element {@code <div class="header gray"}>},
+ * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
+ * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
+ * @return set of classnames, empty if no class attribute
+ */
+ public Set<String> classNames() {
+ if (classNames == null) {
+ String[] names = className().split("\\s+");
+ classNames = new LinkedHashSet<String>(Arrays.asList(names));
+ }
+ return classNames;
+ }
+
+ /**
+ Set the element's {@code class} attribute to the supplied class names.
+ @param classNames set of classes
+ @return this element, for chaining
+ */
+ public Element classNames(Set<String> classNames) {
+ Validate.notNull(classNames);
+ attributes.put("class", StringUtil.join(classNames, " "));
+ return this;
+ }
+
+ /**
+ * Tests if this element has a class. Case insensitive.
+ * @param className name of class to check for
+ * @return true if it does, false if not
+ */
+ public boolean hasClass(String className) {
+ Set<String> classNames = classNames();
+ for (String name : classNames) {
+ if (className.equalsIgnoreCase(name))
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ Add a class name to this element's {@code class} attribute.
+ @param className class name to add
+ @return this element
+ */
+ public Element addClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ classes.add(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ Remove a class name from this element's {@code class} attribute.
+ @param className class name to remove
+ @return this element
+ */
+ public Element removeClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ classes.remove(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
+ @param className class name to toggle
+ @return this element
+ */
+ public Element toggleClass(String className) {
+ Validate.notNull(className);
+
+ Set<String> classes = classNames();
+ if (classes.contains(className))
+ classes.remove(className);
+ else
+ classes.add(className);
+ classNames(classes);
+
+ return this;
+ }
+
+ /**
+ * Get the value of a form element (input, textarea, etc).
+ * @return the value of the form element, or empty string if not set.
+ */
+ public String val() {
+ if (tagName().equals("textarea"))
+ return text();
+ else
+ return attr("value");
+ }
+
+ /**
+ * Set the value of a form element (input, textarea, etc).
+ * @param value value to set
+ * @return this element (for chaining)
+ */
+ public Element val(String value) {
+ if (tagName().equals("textarea"))
+ text(value);
+ else
+ attr("value", value);
+ return this;
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ if (accum.length() > 0 && out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock())))
+ indent(accum, depth, out);
+ accum
+ .append("<")
+ .append(tagName());
+ attributes.html(accum, out);
+
+ if (childNodes.isEmpty() && tag.isSelfClosing())
+ accum.append(" />");
+ else
+ accum.append(">");
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
+ if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
+ if (out.prettyPrint() && !childNodes.isEmpty() && tag.formatAsBlock())
+ indent(accum, depth, out);
+ accum.append("</").append(tagName()).append(">");
+ }
+ }
+
+ /**
+ * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
+ * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
+ *
+ * @return String of HTML.
+ * @see #outerHtml()
+ */
+ public String html() {
+ StringBuilder accum = new StringBuilder();
+ html(accum);
+ return accum.toString().trim();
+ }
+
+ private void html(StringBuilder accum) {
+ for (Node node : childNodes)
+ node.outerHtml(accum);
+ }
+
+ /**
+ * Set this element's inner HTML. Clears the existing HTML first.
+ * @param html HTML to parse and set into this element
+ * @return this element
+ * @see #append(String)
+ */
+ public Element html(String html) {
+ empty();
+ append(html);
+ return this;
+ }
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return this == o;
+ }
+
+ @Override
+ public int hashCode() {
+ // todo: fixup, not very useful
+ int result = super.hashCode();
+ result = 31 * result + (tag != null ? tag.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public Element clone() {
+ Element clone = (Element) super.clone();
+ clone.classNames(); // creates linked set of class names from class attribute
+ return clone;
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Entities.java b/server/src/org/jsoup/nodes/Entities.java
new file mode 100644
index 0000000000..0ae83e1fc0
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Entities.java
@@ -0,0 +1,184 @@
+package org.jsoup.nodes;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.CharsetEncoder;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * HTML entities, and escape routines.
+ * Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
+ * named character references</a>.
+ */
+public class Entities {
+ public enum EscapeMode {
+ /** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */
+ xhtml(xhtmlByVal),
+ /** Default HTML output entities. */
+ base(baseByVal),
+ /** Complete HTML entities. */
+ extended(fullByVal);
+
+ private Map<Character, String> map;
+
+ EscapeMode(Map<Character, String> map) {
+ this.map = map;
+ }
+
+ public Map<Character, String> getMap() {
+ return map;
+ }
+ }
+
+ private static final Map<String, Character> full;
+ private static final Map<Character, String> xhtmlByVal;
+ private static final Map<Character, String> baseByVal;
+ private static final Map<Character, String> fullByVal;
+ private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
+ private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
+
+ private Entities() {}
+
+ /**
+ * Check if the input is a known named entity
+ * @param name the possible entity name (e.g. "lt" or "amp"
+ * @return true if a known named entity
+ */
+ public static boolean isNamedEntity(String name) {
+ return full.containsKey(name);
+ }
+
+ /**
+ * Get the Character value of the named entity
+ * @param name named entity (e.g. "lt" or "amp")
+ * @return the Character value of the named entity (e.g. '<' or '&')
+ */
+ public static Character getCharacterByName(String name) {
+ return full.get(name);
+ }
+
+ static String escape(String string, Document.OutputSettings out) {
+ return escape(string, out.encoder(), out.escapeMode());
+ }
+
+ static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
+ StringBuilder accum = new StringBuilder(string.length() * 2);
+ Map<Character, String> map = escapeMode.getMap();
+
+ for (int pos = 0; pos < string.length(); pos++) {
+ Character c = string.charAt(pos);
+ if (map.containsKey(c))
+ accum.append('&').append(map.get(c)).append(';');
+ else if (encoder.canEncode(c))
+ accum.append(c.charValue());
+ else
+ accum.append("&#").append((int) c).append(';');
+ }
+
+ return accum.toString();
+ }
+
+ static String unescape(String string) {
+ return unescape(string, false);
+ }
+
+ /**
+ * Unescape the input string.
+ * @param string
+ * @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional)
+ * @return
+ */
+ static String unescape(String string, boolean strict) {
+ // todo: change this method to use Tokeniser.consumeCharacterReference
+ if (!string.contains("&"))
+ return string;
+
+ Matcher m = strict? strictUnescapePattern.matcher(string) : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
+ StringBuffer accum = new StringBuffer(string.length()); // pity matcher can't use stringbuilder, avoid syncs
+ // todo: replace m.appendReplacement with own impl, so StringBuilder and quoteReplacement not required
+
+ while (m.find()) {
+ int charval = -1;
+ String num = m.group(3);
+ if (num != null) {
+ try {
+ int base = m.group(2) != null ? 16 : 10; // 2 is hex indicator
+ charval = Integer.valueOf(num, base);
+ } catch (NumberFormatException e) {
+ } // skip
+ } else {
+ String name = m.group(1);
+ if (full.containsKey(name))
+ charval = full.get(name);
+ }
+
+ if (charval != -1 || charval > 0xFFFF) { // out of range
+ String c = Character.toString((char) charval);
+ m.appendReplacement(accum, Matcher.quoteReplacement(c));
+ } else {
+ m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace with original string
+ }
+ }
+ m.appendTail(accum);
+ return accum.toString();
+ }
+
+ // xhtml has restricted entities
+ private static final Object[][] xhtmlArray = {
+ {"quot", 0x00022},
+ {"amp", 0x00026},
+ {"apos", 0x00027},
+ {"lt", 0x0003C},
+ {"gt", 0x0003E}
+ };
+
+ static {
+ xhtmlByVal = new HashMap<Character, String>();
+ baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most common / default
+ full = loadEntities("entities-full.properties"); // extended and overblown.
+ fullByVal = toCharacterKey(full);
+
+ for (Object[] entity : xhtmlArray) {
+ Character c = Character.valueOf((char) ((Integer) entity[1]).intValue());
+ xhtmlByVal.put(c, ((String) entity[0]));
+ }
+ }
+
+ private static Map<String, Character> loadEntities(String filename) {
+ Properties properties = new Properties();
+ Map<String, Character> entities = new HashMap<String, Character>();
+ try {
+ InputStream in = Entities.class.getResourceAsStream(filename);
+ properties.load(in);
+ in.close();
+ } catch (IOException e) {
+ throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename);
+ }
+
+ for (Map.Entry entry: properties.entrySet()) {
+ Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16));
+ String name = (String) entry.getKey();
+ entities.put(name, val);
+ }
+ return entities;
+ }
+
+ private static Map<Character, String> toCharacterKey(Map<String, Character> inMap) {
+ Map<Character, String> outMap = new HashMap<Character, String>();
+ for (Map.Entry<String, Character> entry: inMap.entrySet()) {
+ Character character = entry.getValue();
+ String name = entry.getKey();
+
+ if (outMap.containsKey(character)) {
+ // dupe, prefer the lower case version
+ if (name.toLowerCase().equals(name))
+ outMap.put(character, name);
+ } else {
+ outMap.put(character, name);
+ }
+ }
+ return outMap;
+ }
+}
diff --git a/server/src/org/jsoup/nodes/Node.java b/server/src/org/jsoup/nodes/Node.java
new file mode 100644
index 0000000000..eb2b40ee73
--- /dev/null
+++ b/server/src/org/jsoup/nodes/Node.java
@@ -0,0 +1,615 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Parser;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public abstract class Node implements Cloneable {
+ Node parentNode;
+ List<Node> childNodes;
+ Attributes attributes;
+ String baseUri;
+ int siblingIndex;
+
+ /**
+ Create a new Node.
+ @param baseUri base URI
+ @param attributes attributes (not null, but may be empty)
+ */
+ protected Node(String baseUri, Attributes attributes) {
+ Validate.notNull(baseUri);
+ Validate.notNull(attributes);
+
+ childNodes = new ArrayList<Node>(4);
+ this.baseUri = baseUri.trim();
+ this.attributes = attributes;
+ }
+
+ protected Node(String baseUri) {
+ this(baseUri, new Attributes());
+ }
+
+ /**
+ * Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
+ */
+ protected Node() {
+ childNodes = Collections.emptyList();
+ attributes = null;
+ }
+
+ /**
+ Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
+ @return node name
+ */
+ public abstract String nodeName();
+
+ /**
+ * Get an attribute's value by its key.
+ * <p/>
+ * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
+ * which is a shortcut to the {@link #absUrl} method.
+ * E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
+ * @param attributeKey The attribute key.
+ * @return The attribute, or empty string if not present (to avoid nulls).
+ * @see #attributes()
+ * @see #hasAttr(String)
+ * @see #absUrl(String)
+ */
+ public String attr(String attributeKey) {
+ Validate.notNull(attributeKey);
+
+ if (attributes.hasKey(attributeKey))
+ return attributes.get(attributeKey);
+ else if (attributeKey.toLowerCase().startsWith("abs:"))
+ return absUrl(attributeKey.substring("abs:".length()));
+ else return "";
+ }
+
+ /**
+ * Get all of the element's attributes.
+ * @return attributes (which implements iterable, in same order as presented in original HTML).
+ */
+ public Attributes attributes() {
+ return attributes;
+ }
+
+ /**
+ * Set an attribute (key=value). If the attribute already exists, it is replaced.
+ * @param attributeKey The attribute key.
+ * @param attributeValue The attribute value.
+ * @return this (for chaining)
+ */
+ public Node attr(String attributeKey, String attributeValue) {
+ attributes.put(attributeKey, attributeValue);
+ return this;
+ }
+
+ /**
+ * Test if this element has an attribute.
+ * @param attributeKey The attribute key to check.
+ * @return true if the attribute exists, false if not.
+ */
+ public boolean hasAttr(String attributeKey) {
+ Validate.notNull(attributeKey);
+
+ if (attributeKey.toLowerCase().startsWith("abs:")) {
+ String key = attributeKey.substring("abs:".length());
+ if (attributes.hasKey(key) && !absUrl(key).equals(""))
+ return true;
+ }
+ return attributes.hasKey(attributeKey);
+ }
+
+ /**
+ * Remove an attribute from this element.
+ * @param attributeKey The attribute to remove.
+ * @return this (for chaining)
+ */
+ public Node removeAttr(String attributeKey) {
+ Validate.notNull(attributeKey);
+ attributes.remove(attributeKey);
+ return this;
+ }
+
+ /**
+ Get the base URI of this node.
+ @return base URI
+ */
+ public String baseUri() {
+ return baseUri;
+ }
+
+ /**
+ Update the base URI of this node and all of its descendants.
+ @param baseUri base URI to set
+ */
+ public void setBaseUri(final String baseUri) {
+ Validate.notNull(baseUri);
+
+ traverse(new NodeVisitor() {
+ public void head(Node node, int depth) {
+ node.baseUri = baseUri;
+ }
+
+ public void tail(Node node, int depth) {
+ }
+ });
+ }
+
+ /**
+ * Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href></code> or
+ * <code>&lt;img src></code>).
+ * <p/>
+ * E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
+ * <p/>
+ * If the attribute value is already absolute (i.e. it starts with a protocol, like
+ * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
+ * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
+ * absolute using that.
+ * <p/>
+ * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
+ * <code>String absUrl = linkEl.attr("abs:href");</code>
+ *
+ * @param attributeKey The attribute key
+ * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
+ * could not be made successfully into a URL.
+ * @see #attr
+ * @see java.net.URL#URL(java.net.URL, String)
+ */
+ public String absUrl(String attributeKey) {
+ Validate.notEmpty(attributeKey);
+
+ String relUrl = attr(attributeKey);
+ if (!hasAttr(attributeKey)) {
+ return ""; // nothing to make absolute with
+ } else {
+ URL base;
+ try {
+ try {
+ base = new URL(baseUri);
+ } catch (MalformedURLException e) {
+ // the base is unsuitable, but the attribute may be abs on its own, so try that
+ URL abs = new URL(relUrl);
+ return abs.toExternalForm();
+ }
+ // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
+ if (relUrl.startsWith("?"))
+ relUrl = base.getPath() + relUrl;
+ URL abs = new URL(base, relUrl);
+ return abs.toExternalForm();
+ } catch (MalformedURLException e) {
+ return "";
+ }
+ }
+ }
+
+ /**
+ Get a child node by index
+ @param index index of child node
+ @return the child node at this index.
+ */
+ public Node childNode(int index) {
+ return childNodes.get(index);
+ }
+
+ /**
+ Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
+ themselves can be manipulated.
+ @return list of children. If no children, returns an empty list.
+ */
+ public List<Node> childNodes() {
+ return Collections.unmodifiableList(childNodes);
+ }
+
+ protected Node[] childNodesAsArray() {
+ return childNodes.toArray(new Node[childNodes().size()]);
+ }
+
+ /**
+ Gets this node's parent node.
+ @return parent node; or null if no parent.
+ */
+ public Node parent() {
+ return parentNode;
+ }
+
+ /**
+ * Gets the Document associated with this Node.
+ * @return the Document associated with this Node, or null if there is no such Document.
+ */
+ public Document ownerDocument() {
+ if (this instanceof Document)
+ return (Document) this;
+ else if (parentNode == null)
+ return null;
+ else
+ return parentNode.ownerDocument();
+ }
+
+ /**
+ * Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
+ */
+ public void remove() {
+ Validate.notNull(parentNode);
+ parentNode.removeChild(this);
+ }
+
+ /**
+ * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
+ * @param html HTML to add before this node
+ * @return this node, for chaining
+ * @see #after(String)
+ */
+ public Node before(String html) {
+ addSiblingHtml(siblingIndex(), html);
+ return this;
+ }
+
+ /**
+ * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
+ * @param node to add before this node
+ * @return this node, for chaining
+ * @see #after(Node)
+ */
+ public Node before(Node node) {
+ Validate.notNull(node);
+ Validate.notNull(parentNode);
+
+ parentNode.addChildren(siblingIndex(), node);
+ return this;
+ }
+
+ /**
+ * Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
+ * @param html HTML to add after this node
+ * @return this node, for chaining
+ * @see #before(String)
+ */
+ public Node after(String html) {
+ addSiblingHtml(siblingIndex()+1, html);
+ return this;
+ }
+
+ /**
+ * Insert the specified node into the DOM after this node (i.e. as a following sibling).
+ * @param node to add after this node
+ * @return this node, for chaining
+ * @see #before(Node)
+ */
+ public Node after(Node node) {
+ Validate.notNull(node);
+ Validate.notNull(parentNode);
+
+ parentNode.addChildren(siblingIndex()+1, node);
+ return this;
+ }
+
+ private void addSiblingHtml(int index, String html) {
+ Validate.notNull(html);
+ Validate.notNull(parentNode);
+
+ Element context = parent() instanceof Element ? (Element) parent() : null;
+ List<Node> nodes = Parser.parseFragment(html, context, baseUri());
+ parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
+ }
+
+ /**
+ Wrap the supplied HTML around this node.
+ @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
+ @return this node, for chaining.
+ */
+ public Node wrap(String html) {
+ Validate.notEmpty(html);
+
+ Element context = parent() instanceof Element ? (Element) parent() : null;
+ List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
+ Node wrapNode = wrapChildren.get(0);
+ if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
+ return null;
+
+ Element wrap = (Element) wrapNode;
+ Element deepest = getDeepChild(wrap);
+ parentNode.replaceChild(this, wrap);
+ deepest.addChildren(this);
+
+ // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
+ if (wrapChildren.size() > 0) {
+ for (int i = 0; i < wrapChildren.size(); i++) {
+ Node remainder = wrapChildren.get(i);
+ remainder.parentNode.removeChild(remainder);
+ wrap.appendChild(remainder);
+ }
+ }
+ return this;
+ }
+
+ /**
+ * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
+ * the node but keeping its children.
+ * <p/>
+ * For example, with the input html:<br/>
+ * {@code <div>One <span>Two <b>Three</b></span></div>}<br/>
+ * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br/>
+ * {@code <div>One Two <b>Three</b></div>}<br/>
+ * and the {@code "Two "} {@link TextNode} being returned.
+ * @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
+ * @see #remove()
+ * @see #wrap(String)
+ */
+ public Node unwrap() {
+ Validate.notNull(parentNode);
+
+ int index = siblingIndex;
+ Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
+ parentNode.addChildren(index, this.childNodesAsArray());
+ this.remove();
+
+ return firstChild;
+ }
+
+ private Element getDeepChild(Element el) {
+ List<Element> children = el.children();
+ if (children.size() > 0)
+ return getDeepChild(children.get(0));
+ else
+ return el;
+ }
+
+ /**
+ * Replace this node in the DOM with the supplied node.
+ * @param in the node that will will replace the existing node.
+ */
+ public void replaceWith(Node in) {
+ Validate.notNull(in);
+ Validate.notNull(parentNode);
+ parentNode.replaceChild(this, in);
+ }
+
+ protected void setParentNode(Node parentNode) {
+ if (this.parentNode != null)
+ this.parentNode.removeChild(this);
+ this.parentNode = parentNode;
+ }
+
+ protected void replaceChild(Node out, Node in) {
+ Validate.isTrue(out.parentNode == this);
+ Validate.notNull(in);
+ if (in.parentNode != null)
+ in.parentNode.removeChild(in);
+
+ Integer index = out.siblingIndex();
+ childNodes.set(index, in);
+ in.parentNode = this;
+ in.setSiblingIndex(index);
+ out.parentNode = null;
+ }
+
+ protected void removeChild(Node out) {
+ Validate.isTrue(out.parentNode == this);
+ int index = out.siblingIndex();
+ childNodes.remove(index);
+ reindexChildren();
+ out.parentNode = null;
+ }
+
+ protected void addChildren(Node... children) {
+ //most used. short circuit addChildren(int), which hits reindex children and array copy
+ for (Node child: children) {
+ reparentChild(child);
+ childNodes.add(child);
+ child.setSiblingIndex(childNodes.size()-1);
+ }
+ }
+
+ protected void addChildren(int index, Node... children) {
+ Validate.noNullElements(children);
+ for (int i = children.length - 1; i >= 0; i--) {
+ Node in = children[i];
+ reparentChild(in);
+ childNodes.add(index, in);
+ }
+ reindexChildren();
+ }
+
+ private void reparentChild(Node child) {
+ if (child.parentNode != null)
+ child.parentNode.removeChild(child);
+ child.setParentNode(this);
+ }
+
+ private void reindexChildren() {
+ for (int i = 0; i < childNodes.size(); i++) {
+ childNodes.get(i).setSiblingIndex(i);
+ }
+ }
+
+ /**
+ Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
+ include this node (a node is not a sibling of itself).
+ @return node siblings. If the node has no parent, returns an empty list.
+ */
+ public List<Node> siblingNodes() {
+ if (parentNode == null)
+ return Collections.emptyList();
+
+ List<Node> nodes = parentNode.childNodes;
+ List<Node> siblings = new ArrayList<Node>(nodes.size() - 1);
+ for (Node node: nodes)
+ if (node != this)
+ siblings.add(node);
+ return siblings;
+ }
+
+ /**
+ Get this node's next sibling.
+ @return next sibling, or null if this is the last sibling
+ */
+ public Node nextSibling() {
+ if (parentNode == null)
+ return null; // root
+
+ List<Node> siblings = parentNode.childNodes;
+ Integer index = siblingIndex();
+ Validate.notNull(index);
+ if (siblings.size() > index+1)
+ return siblings.get(index+1);
+ else
+ return null;
+ }
+
+ /**
+ Get this node's previous sibling.
+ @return the previous sibling, or null if this is the first sibling
+ */
+ public Node previousSibling() {
+ if (parentNode == null)
+ return null; // root
+
+ List<Node> siblings = parentNode.childNodes;
+ Integer index = siblingIndex();
+ Validate.notNull(index);
+ if (index > 0)
+ return siblings.get(index-1);
+ else
+ return null;
+ }
+
+ /**
+ * Get the list index of this node in its node sibling list. I.e. if this is the first node
+ * sibling, returns 0.
+ * @return position in node sibling list
+ * @see org.jsoup.nodes.Element#elementSiblingIndex()
+ */
+ public int siblingIndex() {
+ return siblingIndex;
+ }
+
+ protected void setSiblingIndex(int siblingIndex) {
+ this.siblingIndex = siblingIndex;
+ }
+
+ /**
+ * Perform a depth-first traversal through this node and its descendants.
+ * @param nodeVisitor the visitor callbacks to perform on each node
+ * @return this node, for chaining
+ */
+ public Node traverse(NodeVisitor nodeVisitor) {
+ Validate.notNull(nodeVisitor);
+ NodeTraversor traversor = new NodeTraversor(nodeVisitor);
+ traversor.traverse(this);
+ return this;
+ }
+
+ /**
+ Get the outer HTML of this node.
+ @return HTML
+ */
+ public String outerHtml() {
+ StringBuilder accum = new StringBuilder(128);
+ outerHtml(accum);
+ return accum.toString();
+ }
+
+ protected void outerHtml(StringBuilder accum) {
+ new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this);
+ }
+
+ // if this node has no document (or parent), retrieve the default output settings
+ private Document.OutputSettings getOutputSettings() {
+ return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
+ }
+
+ /**
+ Get the outer HTML of this node.
+ @param accum accumulator to place HTML into
+ */
+ abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
+
+ abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
+ accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ // todo: have nodes hold a child index, compare against that and parent (not children)
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = parentNode != null ? parentNode.hashCode() : 0;
+ // not children, or will block stack as they go back up to parent)
+ result = 31 * result + (attributes != null ? attributes.hashCode() : 0);
+ return result;
+ }
+
+ /**
+ * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
+ * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
+ * original node.
+ * <p>
+ * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
+ * @return stand-alone cloned node
+ */
+ @Override
+ public Node clone() {
+ return doClone(null); // splits for orphan
+ }
+
+ protected Node doClone(Node parent) {
+ Node clone;
+ try {
+ clone = (Node) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+
+ clone.parentNode = parent; // can be null, to create an orphan split
+ clone.siblingIndex = parent == null ? 0 : siblingIndex;
+ clone.attributes = attributes != null ? attributes.clone() : null;
+ clone.baseUri = baseUri;
+ clone.childNodes = new ArrayList<Node>(childNodes.size());
+ for (Node child: childNodes)
+ clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent
+
+ return clone;
+ }
+
+ private static class OuterHtmlVisitor implements NodeVisitor {
+ private StringBuilder accum;
+ private Document.OutputSettings out;
+
+ OuterHtmlVisitor(StringBuilder accum, Document.OutputSettings out) {
+ this.accum = accum;
+ this.out = out;
+ }
+
+ public void head(Node node, int depth) {
+ node.outerHtmlHead(accum, depth, out);
+ }
+
+ public void tail(Node node, int depth) {
+ if (!node.nodeName().equals("#text")) // saves a void hit.
+ node.outerHtmlTail(accum, depth, out);
+ }
+ }
+}
diff --git a/server/src/org/jsoup/nodes/TextNode.java b/server/src/org/jsoup/nodes/TextNode.java
new file mode 100644
index 0000000000..9fd0feac8f
--- /dev/null
+++ b/server/src/org/jsoup/nodes/TextNode.java
@@ -0,0 +1,175 @@
+package org.jsoup.nodes;
+
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+
+/**
+ A text node.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class TextNode extends Node {
+ /*
+ TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use
+ memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create
+ them as needed on the fly.
+ */
+ private static final String TEXT_KEY = "text";
+ String text;
+
+ /**
+ Create a new TextNode representing the supplied (unencoded) text).
+
+ @param text raw text
+ @param baseUri base uri
+ @see #createFromEncoded(String, String)
+ */
+ public TextNode(String text, String baseUri) {
+ this.baseUri = baseUri;
+ this.text = text;
+ }
+
+ public String nodeName() {
+ return "#text";
+ }
+
+ /**
+ * Get the text content of this text node.
+ * @return Unencoded, normalised text.
+ * @see TextNode#getWholeText()
+ */
+ public String text() {
+ return normaliseWhitespace(getWholeText());
+ }
+
+ /**
+ * Set the text content of this text node.
+ * @param text unencoded text
+ * @return this, for chaining
+ */
+ public TextNode text(String text) {
+ this.text = text;
+ if (attributes != null)
+ attributes.put(TEXT_KEY, text);
+ return this;
+ }
+
+ /**
+ Get the (unencoded) text of this text node, including any newlines and spaces present in the original.
+ @return text
+ */
+ public String getWholeText() {
+ return attributes == null ? text : attributes.get(TEXT_KEY);
+ }
+
+ /**
+ Test if this text node is blank -- that is, empty or only whitespace (including newlines).
+ @return true if this document is empty or only whitespace, false if it contains any text content.
+ */
+ public boolean isBlank() {
+ return StringUtil.isBlank(getWholeText());
+ }
+
+ /**
+ * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
+ * original text up to the offset, and will have a new text node sibling containing the text after the offset.
+ * @param offset string offset point to split node at.
+ * @return the newly created text node containing the text after the offset.
+ */
+ public TextNode splitText(int offset) {
+ Validate.isTrue(offset >= 0, "Split offset must be not be negative");
+ Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
+
+ String head = getWholeText().substring(0, offset);
+ String tail = getWholeText().substring(offset);
+ text(head);
+ TextNode tailNode = new TextNode(tail, this.baseUri());
+ if (parent() != null)
+ parent().addChildren(siblingIndex()+1, tailNode);
+
+ return tailNode;
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ String html = Entities.escape(getWholeText(), out);
+ if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
+ html = normaliseWhitespace(html);
+ }
+
+ if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank())
+ indent(accum, depth, out);
+ accum.append(html);
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+
+ public String toString() {
+ return outerHtml();
+ }
+
+ /**
+ * Create a new TextNode from HTML encoded (aka escaped) data.
+ * @param encodedText Text containing encoded HTML (e.g. &amp;lt;)
+ * @return TextNode containing unencoded data (e.g. &lt;)
+ */
+ public static TextNode createFromEncoded(String encodedText, String baseUri) {
+ String text = Entities.unescape(encodedText);
+ return new TextNode(text, baseUri);
+ }
+
+ static String normaliseWhitespace(String text) {
+ text = StringUtil.normaliseWhitespace(text);
+ return text;
+ }
+
+ static String stripLeadingWhitespace(String text) {
+ return text.replaceFirst("^\\s+", "");
+ }
+
+ static boolean lastCharIsWhitespace(StringBuilder sb) {
+ return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' ';
+ }
+
+ // attribute fiddling. create on first access.
+ private void ensureAttributes() {
+ if (attributes == null) {
+ attributes = new Attributes();
+ attributes.put(TEXT_KEY, text);
+ }
+ }
+
+ @Override
+ public String attr(String attributeKey) {
+ ensureAttributes();
+ return super.attr(attributeKey);
+ }
+
+ @Override
+ public Attributes attributes() {
+ ensureAttributes();
+ return super.attributes();
+ }
+
+ @Override
+ public Node attr(String attributeKey, String attributeValue) {
+ ensureAttributes();
+ return super.attr(attributeKey, attributeValue);
+ }
+
+ @Override
+ public boolean hasAttr(String attributeKey) {
+ ensureAttributes();
+ return super.hasAttr(attributeKey);
+ }
+
+ @Override
+ public Node removeAttr(String attributeKey) {
+ ensureAttributes();
+ return super.removeAttr(attributeKey);
+ }
+
+ @Override
+ public String absUrl(String attributeKey) {
+ ensureAttributes();
+ return super.absUrl(attributeKey);
+ }
+}
diff --git a/server/src/org/jsoup/nodes/XmlDeclaration.java b/server/src/org/jsoup/nodes/XmlDeclaration.java
new file mode 100644
index 0000000000..80d4a0152f
--- /dev/null
+++ b/server/src/org/jsoup/nodes/XmlDeclaration.java
@@ -0,0 +1,48 @@
+package org.jsoup.nodes;
+
+/**
+ An XML Declaration.
+
+ @author Jonathan Hedley, jonathan@hedley.net */
+public class XmlDeclaration extends Node {
+ private static final String DECL_KEY = "declaration";
+ private final boolean isProcessingInstruction; // <! if true, <? if false, declaration (and last data char should be ?)
+
+ /**
+ Create a new XML declaration
+ @param data data
+ @param baseUri base uri
+ @param isProcessingInstruction is processing instruction
+ */
+ public XmlDeclaration(String data, String baseUri, boolean isProcessingInstruction) {
+ super(baseUri);
+ attributes.put(DECL_KEY, data);
+ this.isProcessingInstruction = isProcessingInstruction;
+ }
+
+ public String nodeName() {
+ return "#declaration";
+ }
+
+ /**
+ Get the unencoded XML declaration.
+ @return XML declaration
+ */
+ public String getWholeDeclaration() {
+ return attributes.get(DECL_KEY);
+ }
+
+ void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ accum
+ .append("<")
+ .append(isProcessingInstruction ? "!" : "?")
+ .append(getWholeDeclaration())
+ .append(">");
+ }
+
+ void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+
+ public String toString() {
+ return outerHtml();
+ }
+}
diff --git a/server/src/org/jsoup/nodes/entities-base.properties b/server/src/org/jsoup/nodes/entities-base.properties
new file mode 100644
index 0000000000..3d1d11e6c4
--- /dev/null
+++ b/server/src/org/jsoup/nodes/entities-base.properties
@@ -0,0 +1,106 @@
+AElig=000C6
+AMP=00026
+Aacute=000C1
+Acirc=000C2
+Agrave=000C0
+Aring=000C5
+Atilde=000C3
+Auml=000C4
+COPY=000A9
+Ccedil=000C7
+ETH=000D0
+Eacute=000C9
+Ecirc=000CA
+Egrave=000C8
+Euml=000CB
+GT=0003E
+Iacute=000CD
+Icirc=000CE
+Igrave=000CC
+Iuml=000CF
+LT=0003C
+Ntilde=000D1
+Oacute=000D3
+Ocirc=000D4
+Ograve=000D2
+Oslash=000D8
+Otilde=000D5
+Ouml=000D6
+QUOT=00022
+REG=000AE
+THORN=000DE
+Uacute=000DA
+Ucirc=000DB
+Ugrave=000D9
+Uuml=000DC
+Yacute=000DD
+aacute=000E1
+acirc=000E2
+acute=000B4
+aelig=000E6
+agrave=000E0
+amp=00026
+aring=000E5
+atilde=000E3
+auml=000E4
+brvbar=000A6
+ccedil=000E7
+cedil=000B8
+cent=000A2
+copy=000A9
+curren=000A4
+deg=000B0
+divide=000F7
+eacute=000E9
+ecirc=000EA
+egrave=000E8
+eth=000F0
+euml=000EB
+frac12=000BD
+frac14=000BC
+frac34=000BE
+gt=0003E
+iacute=000ED
+icirc=000EE
+iexcl=000A1
+igrave=000EC
+iquest=000BF
+iuml=000EF
+laquo=000AB
+lt=0003C
+macr=000AF
+micro=000B5
+middot=000B7
+nbsp=000A0
+not=000AC
+ntilde=000F1
+oacute=000F3
+ocirc=000F4
+ograve=000F2
+ordf=000AA
+ordm=000BA
+oslash=000F8
+otilde=000F5
+ouml=000F6
+para=000B6
+plusmn=000B1
+pound=000A3
+quot=00022
+raquo=000BB
+reg=000AE
+sect=000A7
+shy=000AD
+sup1=000B9
+sup2=000B2
+sup3=000B3
+szlig=000DF
+thorn=000FE
+times=000D7
+uacute=000FA
+ucirc=000FB
+ugrave=000F9
+uml=000A8
+uuml=000FC
+yacute=000FD
+yen=000A5
+yuml=000FF
diff --git a/server/src/org/jsoup/nodes/entities-full.properties b/server/src/org/jsoup/nodes/entities-full.properties
new file mode 100644
index 0000000000..92f124f408
--- /dev/null
+++ b/server/src/org/jsoup/nodes/entities-full.properties
@@ -0,0 +1,2032 @@
+AElig=000C6
+AMP=00026
+Aacute=000C1
+Abreve=00102
+Acirc=000C2
+Acy=00410
+Afr=1D504
+Agrave=000C0
+Alpha=00391
+Amacr=00100
+And=02A53
+Aogon=00104
+Aopf=1D538
+ApplyFunction=02061
+Aring=000C5
+Ascr=1D49C
+Assign=02254
+Atilde=000C3
+Auml=000C4
+Backslash=02216
+Barv=02AE7
+Barwed=02306
+Bcy=00411
+Because=02235
+Bernoullis=0212C
+Beta=00392
+Bfr=1D505
+Bopf=1D539
+Breve=002D8
+Bscr=0212C
+Bumpeq=0224E
+CHcy=00427
+COPY=000A9
+Cacute=00106
+Cap=022D2
+CapitalDifferentialD=02145
+Cayleys=0212D
+Ccaron=0010C
+Ccedil=000C7
+Ccirc=00108
+Cconint=02230
+Cdot=0010A
+Cedilla=000B8
+CenterDot=000B7
+Cfr=0212D
+Chi=003A7
+CircleDot=02299
+CircleMinus=02296
+CirclePlus=02295
+CircleTimes=02297
+ClockwiseContourIntegral=02232
+CloseCurlyDoubleQuote=0201D
+CloseCurlyQuote=02019
+Colon=02237
+Colone=02A74
+Congruent=02261
+Conint=0222F
+ContourIntegral=0222E
+Copf=02102
+Coproduct=02210
+CounterClockwiseContourIntegral=02233
+Cross=02A2F
+Cscr=1D49E
+Cup=022D3
+CupCap=0224D
+DD=02145
+DDotrahd=02911
+DJcy=00402
+DScy=00405
+DZcy=0040F
+Dagger=02021
+Darr=021A1
+Dashv=02AE4
+Dcaron=0010E
+Dcy=00414
+Del=02207
+Delta=00394
+Dfr=1D507
+DiacriticalAcute=000B4
+DiacriticalDot=002D9
+DiacriticalDoubleAcute=002DD
+DiacriticalGrave=00060
+DiacriticalTilde=002DC
+Diamond=022C4
+DifferentialD=02146
+Dopf=1D53B
+Dot=000A8
+DotDot=020DC
+DotEqual=02250
+DoubleContourIntegral=0222F
+DoubleDot=000A8
+DoubleDownArrow=021D3
+DoubleLeftArrow=021D0
+DoubleLeftRightArrow=021D4
+DoubleLeftTee=02AE4
+DoubleLongLeftArrow=027F8
+DoubleLongLeftRightArrow=027FA
+DoubleLongRightArrow=027F9
+DoubleRightArrow=021D2
+DoubleRightTee=022A8
+DoubleUpArrow=021D1
+DoubleUpDownArrow=021D5
+DoubleVerticalBar=02225
+DownArrow=02193
+DownArrowBar=02913
+DownArrowUpArrow=021F5
+DownBreve=00311
+DownLeftRightVector=02950
+DownLeftTeeVector=0295E
+DownLeftVector=021BD
+DownLeftVectorBar=02956
+DownRightTeeVector=0295F
+DownRightVector=021C1
+DownRightVectorBar=02957
+DownTee=022A4
+DownTeeArrow=021A7
+Downarrow=021D3
+Dscr=1D49F
+Dstrok=00110
+ENG=0014A
+ETH=000D0
+Eacute=000C9
+Ecaron=0011A
+Ecirc=000CA
+Ecy=0042D
+Edot=00116
+Efr=1D508
+Egrave=000C8
+Element=02208
+Emacr=00112
+EmptySmallSquare=025FB
+EmptyVerySmallSquare=025AB
+Eogon=00118
+Eopf=1D53C
+Epsilon=00395
+Equal=02A75
+EqualTilde=02242
+Equilibrium=021CC
+Escr=02130
+Esim=02A73
+Eta=00397
+Euml=000CB
+Exists=02203
+ExponentialE=02147
+Fcy=00424
+Ffr=1D509
+FilledSmallSquare=025FC
+FilledVerySmallSquare=025AA
+Fopf=1D53D
+ForAll=02200
+Fouriertrf=02131
+Fscr=02131
+GJcy=00403
+GT=0003E
+Gamma=00393
+Gammad=003DC
+Gbreve=0011E
+Gcedil=00122
+Gcirc=0011C
+Gcy=00413
+Gdot=00120
+Gfr=1D50A
+Gg=022D9
+Gopf=1D53E
+GreaterEqual=02265
+GreaterEqualLess=022DB
+GreaterFullEqual=02267
+GreaterGreater=02AA2
+GreaterLess=02277
+GreaterSlantEqual=02A7E
+GreaterTilde=02273
+Gscr=1D4A2
+Gt=0226B
+HARDcy=0042A
+Hacek=002C7
+Hat=0005E
+Hcirc=00124
+Hfr=0210C
+HilbertSpace=0210B
+Hopf=0210D
+HorizontalLine=02500
+Hscr=0210B
+Hstrok=00126
+HumpDownHump=0224E
+HumpEqual=0224F
+IEcy=00415
+IJlig=00132
+IOcy=00401
+Iacute=000CD
+Icirc=000CE
+Icy=00418
+Idot=00130
+Ifr=02111
+Igrave=000CC
+Im=02111
+Imacr=0012A
+ImaginaryI=02148
+Implies=021D2
+Int=0222C
+Integral=0222B
+Intersection=022C2
+InvisibleComma=02063
+InvisibleTimes=02062
+Iogon=0012E
+Iopf=1D540
+Iota=00399
+Iscr=02110
+Itilde=00128
+Iukcy=00406
+Iuml=000CF
+Jcirc=00134
+Jcy=00419
+Jfr=1D50D
+Jopf=1D541
+Jscr=1D4A5
+Jsercy=00408
+Jukcy=00404
+KHcy=00425
+KJcy=0040C
+Kappa=0039A
+Kcedil=00136
+Kcy=0041A
+Kfr=1D50E
+Kopf=1D542
+Kscr=1D4A6
+LJcy=00409
+LT=0003C
+Lacute=00139
+Lambda=0039B
+Lang=027EA
+Laplacetrf=02112
+Larr=0219E
+Lcaron=0013D
+Lcedil=0013B
+Lcy=0041B
+LeftAngleBracket=027E8
+LeftArrow=02190
+LeftArrowBar=021E4
+LeftArrowRightArrow=021C6
+LeftCeiling=02308
+LeftDoubleBracket=027E6
+LeftDownTeeVector=02961
+LeftDownVector=021C3
+LeftDownVectorBar=02959
+LeftFloor=0230A
+LeftRightArrow=02194
+LeftRightVector=0294E
+LeftTee=022A3
+LeftTeeArrow=021A4
+LeftTeeVector=0295A
+LeftTriangle=022B2
+LeftTriangleBar=029CF
+LeftTriangleEqual=022B4
+LeftUpDownVector=02951
+LeftUpTeeVector=02960
+LeftUpVector=021BF
+LeftUpVectorBar=02958
+LeftVector=021BC
+LeftVectorBar=02952
+Leftarrow=021D0
+Leftrightarrow=021D4
+LessEqualGreater=022DA
+LessFullEqual=02266
+LessGreater=02276
+LessLess=02AA1
+LessSlantEqual=02A7D
+LessTilde=02272
+Lfr=1D50F
+Ll=022D8
+Lleftarrow=021DA
+Lmidot=0013F
+LongLeftArrow=027F5
+LongLeftRightArrow=027F7
+LongRightArrow=027F6
+Longleftarrow=027F8
+Longleftrightarrow=027FA
+Longrightarrow=027F9
+Lopf=1D543
+LowerLeftArrow=02199
+LowerRightArrow=02198
+Lscr=02112
+Lsh=021B0
+Lstrok=00141
+Lt=0226A
+Map=02905
+Mcy=0041C
+MediumSpace=0205F
+Mellintrf=02133
+Mfr=1D510
+MinusPlus=02213
+Mopf=1D544
+Mscr=02133
+Mu=0039C
+NJcy=0040A
+Nacute=00143
+Ncaron=00147
+Ncedil=00145
+Ncy=0041D
+NegativeMediumSpace=0200B
+NegativeThickSpace=0200B
+NegativeThinSpace=0200B
+NegativeVeryThinSpace=0200B
+NestedGreaterGreater=0226B
+NestedLessLess=0226A
+NewLine=0000A
+Nfr=1D511
+NoBreak=02060
+NonBreakingSpace=000A0
+Nopf=02115
+Not=02AEC
+NotCongruent=02262
+NotCupCap=0226D
+NotDoubleVerticalBar=02226
+NotElement=02209
+NotEqual=02260
+NotExists=02204
+NotGreater=0226F
+NotGreaterEqual=02271
+NotGreaterLess=02279
+NotGreaterTilde=02275
+NotLeftTriangle=022EA
+NotLeftTriangleEqual=022EC
+NotLess=0226E
+NotLessEqual=02270
+NotLessGreater=02278
+NotLessTilde=02274
+NotPrecedes=02280
+NotPrecedesSlantEqual=022E0
+NotReverseElement=0220C
+NotRightTriangle=022EB
+NotRightTriangleEqual=022ED
+NotSquareSubsetEqual=022E2
+NotSquareSupersetEqual=022E3
+NotSubsetEqual=02288
+NotSucceeds=02281
+NotSucceedsSlantEqual=022E1
+NotSupersetEqual=02289
+NotTilde=02241
+NotTildeEqual=02244
+NotTildeFullEqual=02247
+NotTildeTilde=02249
+NotVerticalBar=02224
+Nscr=1D4A9
+Ntilde=000D1
+Nu=0039D
+OElig=00152
+Oacute=000D3
+Ocirc=000D4
+Ocy=0041E
+Odblac=00150
+Ofr=1D512
+Ograve=000D2
+Omacr=0014C
+Omega=003A9
+Omicron=0039F
+Oopf=1D546
+OpenCurlyDoubleQuote=0201C
+OpenCurlyQuote=02018
+Or=02A54
+Oscr=1D4AA
+Oslash=000D8
+Otilde=000D5
+Otimes=02A37
+Ouml=000D6
+OverBar=0203E
+OverBrace=023DE
+OverBracket=023B4
+OverParenthesis=023DC
+PartialD=02202
+Pcy=0041F
+Pfr=1D513
+Phi=003A6
+Pi=003A0
+PlusMinus=000B1
+Poincareplane=0210C
+Popf=02119
+Pr=02ABB
+Precedes=0227A
+PrecedesEqual=02AAF
+PrecedesSlantEqual=0227C
+PrecedesTilde=0227E
+Prime=02033
+Product=0220F
+Proportion=02237
+Proportional=0221D
+Pscr=1D4AB
+Psi=003A8
+QUOT=00022
+Qfr=1D514
+Qopf=0211A
+Qscr=1D4AC
+RBarr=02910
+REG=000AE
+Racute=00154
+Rang=027EB
+Rarr=021A0
+Rarrtl=02916
+Rcaron=00158
+Rcedil=00156
+Rcy=00420
+Re=0211C
+ReverseElement=0220B
+ReverseEquilibrium=021CB
+ReverseUpEquilibrium=0296F
+Rfr=0211C
+Rho=003A1
+RightAngleBracket=027E9
+RightArrow=02192
+RightArrowBar=021E5
+RightArrowLeftArrow=021C4
+RightCeiling=02309
+RightDoubleBracket=027E7
+RightDownTeeVector=0295D
+RightDownVector=021C2
+RightDownVectorBar=02955
+RightFloor=0230B
+RightTee=022A2
+RightTeeArrow=021A6
+RightTeeVector=0295B
+RightTriangle=022B3
+RightTriangleBar=029D0
+RightTriangleEqual=022B5
+RightUpDownVector=0294F
+RightUpTeeVector=0295C
+RightUpVector=021BE
+RightUpVectorBar=02954
+RightVector=021C0
+RightVectorBar=02953
+Rightarrow=021D2
+Ropf=0211D
+RoundImplies=02970
+Rrightarrow=021DB
+Rscr=0211B
+Rsh=021B1
+RuleDelayed=029F4
+SHCHcy=00429
+SHcy=00428
+SOFTcy=0042C
+Sacute=0015A
+Sc=02ABC
+Scaron=00160
+Scedil=0015E
+Scirc=0015C
+Scy=00421
+Sfr=1D516
+ShortDownArrow=02193
+ShortLeftArrow=02190
+ShortRightArrow=02192
+ShortUpArrow=02191
+Sigma=003A3
+SmallCircle=02218
+Sopf=1D54A
+Sqrt=0221A
+Square=025A1
+SquareIntersection=02293
+SquareSubset=0228F
+SquareSubsetEqual=02291
+SquareSuperset=02290
+SquareSupersetEqual=02292
+SquareUnion=02294
+Sscr=1D4AE
+Star=022C6
+Sub=022D0
+Subset=022D0
+SubsetEqual=02286
+Succeeds=0227B
+SucceedsEqual=02AB0
+SucceedsSlantEqual=0227D
+SucceedsTilde=0227F
+SuchThat=0220B
+Sum=02211
+Sup=022D1
+Superset=02283
+SupersetEqual=02287
+Supset=022D1
+THORN=000DE
+TRADE=02122
+TSHcy=0040B
+TScy=00426
+Tab=00009
+Tau=003A4
+Tcaron=00164
+Tcedil=00162
+Tcy=00422
+Tfr=1D517
+Therefore=02234
+Theta=00398
+ThinSpace=02009
+Tilde=0223C
+TildeEqual=02243
+TildeFullEqual=02245
+TildeTilde=02248
+Topf=1D54B
+TripleDot=020DB
+Tscr=1D4AF
+Tstrok=00166
+Uacute=000DA
+Uarr=0219F
+Uarrocir=02949
+Ubrcy=0040E
+Ubreve=0016C
+Ucirc=000DB
+Ucy=00423
+Udblac=00170
+Ufr=1D518
+Ugrave=000D9
+Umacr=0016A
+UnderBar=0005F
+UnderBrace=023DF
+UnderBracket=023B5
+UnderParenthesis=023DD
+Union=022C3
+UnionPlus=0228E
+Uogon=00172
+Uopf=1D54C
+UpArrow=02191
+UpArrowBar=02912
+UpArrowDownArrow=021C5
+UpDownArrow=02195
+UpEquilibrium=0296E
+UpTee=022A5
+UpTeeArrow=021A5
+Uparrow=021D1
+Updownarrow=021D5
+UpperLeftArrow=02196
+UpperRightArrow=02197
+Upsi=003D2
+Upsilon=003A5
+Uring=0016E
+Uscr=1D4B0
+Utilde=00168
+Uuml=000DC
+VDash=022AB
+Vbar=02AEB
+Vcy=00412
+Vdash=022A9
+Vdashl=02AE6
+Vee=022C1
+Verbar=02016
+Vert=02016
+VerticalBar=02223
+VerticalLine=0007C
+VerticalSeparator=02758
+VerticalTilde=02240
+VeryThinSpace=0200A
+Vfr=1D519
+Vopf=1D54D
+Vscr=1D4B1
+Vvdash=022AA
+Wcirc=00174
+Wedge=022C0
+Wfr=1D51A
+Wopf=1D54E
+Wscr=1D4B2
+Xfr=1D51B
+Xi=0039E
+Xopf=1D54F
+Xscr=1D4B3
+YAcy=0042F
+YIcy=00407
+YUcy=0042E
+Yacute=000DD
+Ycirc=00176
+Ycy=0042B
+Yfr=1D51C
+Yopf=1D550
+Yscr=1D4B4
+Yuml=00178
+ZHcy=00416
+Zacute=00179
+Zcaron=0017D
+Zcy=00417
+Zdot=0017B
+ZeroWidthSpace=0200B
+Zeta=00396
+Zfr=02128
+Zopf=02124
+Zscr=1D4B5
+aacute=000E1
+abreve=00103
+ac=0223E
+acd=0223F
+acirc=000E2
+acute=000B4
+acy=00430
+aelig=000E6
+af=02061
+afr=1D51E
+agrave=000E0
+alefsym=02135
+aleph=02135
+alpha=003B1
+amacr=00101
+amalg=02A3F
+amp=00026
+and=02227
+andand=02A55
+andd=02A5C
+andslope=02A58
+andv=02A5A
+ang=02220
+ange=029A4
+angle=02220
+angmsd=02221
+angmsdaa=029A8
+angmsdab=029A9
+angmsdac=029AA
+angmsdad=029AB
+angmsdae=029AC
+angmsdaf=029AD
+angmsdag=029AE
+angmsdah=029AF
+angrt=0221F
+angrtvb=022BE
+angrtvbd=0299D
+angsph=02222
+angst=000C5
+angzarr=0237C
+aogon=00105
+aopf=1D552
+ap=02248
+apE=02A70
+apacir=02A6F
+ape=0224A
+apid=0224B
+apos=00027
+approx=02248
+approxeq=0224A
+aring=000E5
+ascr=1D4B6
+ast=0002A
+asymp=02248
+asympeq=0224D
+atilde=000E3
+auml=000E4
+awconint=02233
+awint=02A11
+bNot=02AED
+backcong=0224C
+backepsilon=003F6
+backprime=02035
+backsim=0223D
+backsimeq=022CD
+barvee=022BD
+barwed=02305
+barwedge=02305
+bbrk=023B5
+bbrktbrk=023B6
+bcong=0224C
+bcy=00431
+bdquo=0201E
+becaus=02235
+because=02235
+bemptyv=029B0
+bepsi=003F6
+bernou=0212C
+beta=003B2
+beth=02136
+between=0226C
+bfr=1D51F
+bigcap=022C2
+bigcirc=025EF
+bigcup=022C3
+bigodot=02A00
+bigoplus=02A01
+bigotimes=02A02
+bigsqcup=02A06
+bigstar=02605
+bigtriangledown=025BD
+bigtriangleup=025B3
+biguplus=02A04
+bigvee=022C1
+bigwedge=022C0
+bkarow=0290D
+blacklozenge=029EB
+blacksquare=025AA
+blacktriangle=025B4
+blacktriangledown=025BE
+blacktriangleleft=025C2
+blacktriangleright=025B8
+blank=02423
+blk12=02592
+blk14=02591
+blk34=02593
+block=02588
+bnot=02310
+bopf=1D553
+bot=022A5
+bottom=022A5
+bowtie=022C8
+boxDL=02557
+boxDR=02554
+boxDl=02556
+boxDr=02553
+boxH=02550
+boxHD=02566
+boxHU=02569
+boxHd=02564
+boxHu=02567
+boxUL=0255D
+boxUR=0255A
+boxUl=0255C
+boxUr=02559
+boxV=02551
+boxVH=0256C
+boxVL=02563
+boxVR=02560
+boxVh=0256B
+boxVl=02562
+boxVr=0255F
+boxbox=029C9
+boxdL=02555
+boxdR=02552
+boxdl=02510
+boxdr=0250C
+boxh=02500
+boxhD=02565
+boxhU=02568
+boxhd=0252C
+boxhu=02534
+boxminus=0229F
+boxplus=0229E
+boxtimes=022A0
+boxuL=0255B
+boxuR=02558
+boxul=02518
+boxur=02514
+boxv=02502
+boxvH=0256A
+boxvL=02561
+boxvR=0255E
+boxvh=0253C
+boxvl=02524
+boxvr=0251C
+bprime=02035
+breve=002D8
+brvbar=000A6
+bscr=1D4B7
+bsemi=0204F
+bsim=0223D
+bsime=022CD
+bsol=0005C
+bsolb=029C5
+bsolhsub=027C8
+bull=02022
+bullet=02022
+bump=0224E
+bumpE=02AAE
+bumpe=0224F
+bumpeq=0224F
+cacute=00107
+cap=02229
+capand=02A44
+capbrcup=02A49
+capcap=02A4B
+capcup=02A47
+capdot=02A40
+caret=02041
+caron=002C7
+ccaps=02A4D
+ccaron=0010D
+ccedil=000E7
+ccirc=00109
+ccups=02A4C
+ccupssm=02A50
+cdot=0010B
+cedil=000B8
+cemptyv=029B2
+cent=000A2
+centerdot=000B7
+cfr=1D520
+chcy=00447
+check=02713
+checkmark=02713
+chi=003C7
+cir=025CB
+cirE=029C3
+circ=002C6
+circeq=02257
+circlearrowleft=021BA
+circlearrowright=021BB
+circledR=000AE
+circledS=024C8
+circledast=0229B
+circledcirc=0229A
+circleddash=0229D
+cire=02257
+cirfnint=02A10
+cirmid=02AEF
+cirscir=029C2
+clubs=02663
+clubsuit=02663
+colon=0003A
+colone=02254
+coloneq=02254
+comma=0002C
+commat=00040
+comp=02201
+compfn=02218
+complement=02201
+complexes=02102
+cong=02245
+congdot=02A6D
+conint=0222E
+copf=1D554
+coprod=02210
+copy=000A9
+copysr=02117
+crarr=021B5
+cross=02717
+cscr=1D4B8
+csub=02ACF
+csube=02AD1
+csup=02AD0
+csupe=02AD2
+ctdot=022EF
+cudarrl=02938
+cudarrr=02935
+cuepr=022DE
+cuesc=022DF
+cularr=021B6
+cularrp=0293D
+cup=0222A
+cupbrcap=02A48
+cupcap=02A46
+cupcup=02A4A
+cupdot=0228D
+cupor=02A45
+curarr=021B7
+curarrm=0293C
+curlyeqprec=022DE
+curlyeqsucc=022DF
+curlyvee=022CE
+curlywedge=022CF
+curren=000A4
+curvearrowleft=021B6
+curvearrowright=021B7
+cuvee=022CE
+cuwed=022CF
+cwconint=02232
+cwint=02231
+cylcty=0232D
+dArr=021D3
+dHar=02965
+dagger=02020
+daleth=02138
+darr=02193
+dash=02010
+dashv=022A3
+dbkarow=0290F
+dblac=002DD
+dcaron=0010F
+dcy=00434
+dd=02146
+ddagger=02021
+ddarr=021CA
+ddotseq=02A77
+deg=000B0
+delta=003B4
+demptyv=029B1
+dfisht=0297F
+dfr=1D521
+dharl=021C3
+dharr=021C2
+diam=022C4
+diamond=022C4
+diamondsuit=02666
+diams=02666
+die=000A8
+digamma=003DD
+disin=022F2
+div=000F7
+divide=000F7
+divideontimes=022C7
+divonx=022C7
+djcy=00452
+dlcorn=0231E
+dlcrop=0230D
+dollar=00024
+dopf=1D555
+dot=002D9
+doteq=02250
+doteqdot=02251
+dotminus=02238
+dotplus=02214
+dotsquare=022A1
+doublebarwedge=02306
+downarrow=02193
+downdownarrows=021CA
+downharpoonleft=021C3
+downharpoonright=021C2
+drbkarow=02910
+drcorn=0231F
+drcrop=0230C
+dscr=1D4B9
+dscy=00455
+dsol=029F6
+dstrok=00111
+dtdot=022F1
+dtri=025BF
+dtrif=025BE
+duarr=021F5
+duhar=0296F
+dwangle=029A6
+dzcy=0045F
+dzigrarr=027FF
+eDDot=02A77
+eDot=02251
+eacute=000E9
+easter=02A6E
+ecaron=0011B
+ecir=02256
+ecirc=000EA
+ecolon=02255
+ecy=0044D
+edot=00117
+ee=02147
+efDot=02252
+efr=1D522
+eg=02A9A
+egrave=000E8
+egs=02A96
+egsdot=02A98
+el=02A99
+elinters=023E7
+ell=02113
+els=02A95
+elsdot=02A97
+emacr=00113
+empty=02205
+emptyset=02205
+emptyv=02205
+emsp13=02004
+emsp14=02005
+emsp=02003
+eng=0014B
+ensp=02002
+eogon=00119
+eopf=1D556
+epar=022D5
+eparsl=029E3
+eplus=02A71
+epsi=003B5
+epsilon=003B5
+epsiv=003F5
+eqcirc=02256
+eqcolon=02255
+eqsim=02242
+eqslantgtr=02A96
+eqslantless=02A95
+equals=0003D
+equest=0225F
+equiv=02261
+equivDD=02A78
+eqvparsl=029E5
+erDot=02253
+erarr=02971
+escr=0212F
+esdot=02250
+esim=02242
+eta=003B7
+eth=000F0
+euml=000EB
+euro=020AC
+excl=00021
+exist=02203
+expectation=02130
+exponentiale=02147
+fallingdotseq=02252
+fcy=00444
+female=02640
+ffilig=0FB03
+fflig=0FB00
+ffllig=0FB04
+ffr=1D523
+filig=0FB01
+flat=0266D
+fllig=0FB02
+fltns=025B1
+fnof=00192
+fopf=1D557
+forall=02200
+fork=022D4
+forkv=02AD9
+fpartint=02A0D
+frac12=000BD
+frac13=02153
+frac14=000BC
+frac15=02155
+frac16=02159
+frac18=0215B
+frac23=02154
+frac25=02156
+frac34=000BE
+frac35=02157
+frac38=0215C
+frac45=02158
+frac56=0215A
+frac58=0215D
+frac78=0215E
+frasl=02044
+frown=02322
+fscr=1D4BB
+gE=02267
+gEl=02A8C
+gacute=001F5
+gamma=003B3
+gammad=003DD
+gap=02A86
+gbreve=0011F
+gcirc=0011D
+gcy=00433
+gdot=00121
+ge=02265
+gel=022DB
+geq=02265
+geqq=02267
+geqslant=02A7E
+ges=02A7E
+gescc=02AA9
+gesdot=02A80
+gesdoto=02A82
+gesdotol=02A84
+gesles=02A94
+gfr=1D524
+gg=0226B
+ggg=022D9
+gimel=02137
+gjcy=00453
+gl=02277
+glE=02A92
+gla=02AA5
+glj=02AA4
+gnE=02269
+gnap=02A8A
+gnapprox=02A8A
+gne=02A88
+gneq=02A88
+gneqq=02269
+gnsim=022E7
+gopf=1D558
+grave=00060
+gscr=0210A
+gsim=02273
+gsime=02A8E
+gsiml=02A90
+gt=0003E
+gtcc=02AA7
+gtcir=02A7A
+gtdot=022D7
+gtlPar=02995
+gtquest=02A7C
+gtrapprox=02A86
+gtrarr=02978
+gtrdot=022D7
+gtreqless=022DB
+gtreqqless=02A8C
+gtrless=02277
+gtrsim=02273
+hArr=021D4
+hairsp=0200A
+half=000BD
+hamilt=0210B
+hardcy=0044A
+harr=02194
+harrcir=02948
+harrw=021AD
+hbar=0210F
+hcirc=00125
+hearts=02665
+heartsuit=02665
+hellip=02026
+hercon=022B9
+hfr=1D525
+hksearow=02925
+hkswarow=02926
+hoarr=021FF
+homtht=0223B
+hookleftarrow=021A9
+hookrightarrow=021AA
+hopf=1D559
+horbar=02015
+hscr=1D4BD
+hslash=0210F
+hstrok=00127
+hybull=02043
+hyphen=02010
+iacute=000ED
+ic=02063
+icirc=000EE
+icy=00438
+iecy=00435
+iexcl=000A1
+iff=021D4
+ifr=1D526
+igrave=000EC
+ii=02148
+iiiint=02A0C
+iiint=0222D
+iinfin=029DC
+iiota=02129
+ijlig=00133
+imacr=0012B
+image=02111
+imagline=02110
+imagpart=02111
+imath=00131
+imof=022B7
+imped=001B5
+in=02208
+incare=02105
+infin=0221E
+infintie=029DD
+inodot=00131
+int=0222B
+intcal=022BA
+integers=02124
+intercal=022BA
+intlarhk=02A17
+intprod=02A3C
+iocy=00451
+iogon=0012F
+iopf=1D55A
+iota=003B9
+iprod=02A3C
+iquest=000BF
+iscr=1D4BE
+isin=02208
+isinE=022F9
+isindot=022F5
+isins=022F4
+isinsv=022F3
+isinv=02208
+it=02062
+itilde=00129
+iukcy=00456
+iuml=000EF
+jcirc=00135
+jcy=00439
+jfr=1D527
+jmath=00237
+jopf=1D55B
+jscr=1D4BF
+jsercy=00458
+jukcy=00454
+kappa=003BA
+kappav=003F0
+kcedil=00137
+kcy=0043A
+kfr=1D528
+kgreen=00138
+khcy=00445
+kjcy=0045C
+kopf=1D55C
+kscr=1D4C0
+lAarr=021DA
+lArr=021D0
+lAtail=0291B
+lBarr=0290E
+lE=02266
+lEg=02A8B
+lHar=02962
+lacute=0013A
+laemptyv=029B4
+lagran=02112
+lambda=003BB
+lang=027E8
+langd=02991
+langle=027E8
+lap=02A85
+laquo=000AB
+larr=02190
+larrb=021E4
+larrbfs=0291F
+larrfs=0291D
+larrhk=021A9
+larrlp=021AB
+larrpl=02939
+larrsim=02973
+larrtl=021A2
+lat=02AAB
+latail=02919
+late=02AAD
+lbarr=0290C
+lbbrk=02772
+lbrace=0007B
+lbrack=0005B
+lbrke=0298B
+lbrksld=0298F
+lbrkslu=0298D
+lcaron=0013E
+lcedil=0013C
+lceil=02308
+lcub=0007B
+lcy=0043B
+ldca=02936
+ldquo=0201C
+ldquor=0201E
+ldrdhar=02967
+ldrushar=0294B
+ldsh=021B2
+le=02264
+leftarrow=02190
+leftarrowtail=021A2
+leftharpoondown=021BD
+leftharpoonup=021BC
+leftleftarrows=021C7
+leftrightarrow=02194
+leftrightarrows=021C6
+leftrightharpoons=021CB
+leftrightsquigarrow=021AD
+leftthreetimes=022CB
+leg=022DA
+leq=02264
+leqq=02266
+leqslant=02A7D
+les=02A7D
+lescc=02AA8
+lesdot=02A7F
+lesdoto=02A81
+lesdotor=02A83
+lesges=02A93
+lessapprox=02A85
+lessdot=022D6
+lesseqgtr=022DA
+lesseqqgtr=02A8B
+lessgtr=02276
+lesssim=02272
+lfisht=0297C
+lfloor=0230A
+lfr=1D529
+lg=02276
+lgE=02A91
+lhard=021BD
+lharu=021BC
+lharul=0296A
+lhblk=02584
+ljcy=00459
+ll=0226A
+llarr=021C7
+llcorner=0231E
+llhard=0296B
+lltri=025FA
+lmidot=00140
+lmoust=023B0
+lmoustache=023B0
+lnE=02268
+lnap=02A89
+lnapprox=02A89
+lne=02A87
+lneq=02A87
+lneqq=02268
+lnsim=022E6
+loang=027EC
+loarr=021FD
+lobrk=027E6
+longleftarrow=027F5
+longleftrightarrow=027F7
+longmapsto=027FC
+longrightarrow=027F6
+looparrowleft=021AB
+looparrowright=021AC
+lopar=02985
+lopf=1D55D
+loplus=02A2D
+lotimes=02A34
+lowast=02217
+lowbar=0005F
+loz=025CA
+lozenge=025CA
+lozf=029EB
+lpar=00028
+lparlt=02993
+lrarr=021C6
+lrcorner=0231F
+lrhar=021CB
+lrhard=0296D
+lrm=0200E
+lrtri=022BF
+lsaquo=02039
+lscr=1D4C1
+lsh=021B0
+lsim=02272
+lsime=02A8D
+lsimg=02A8F
+lsqb=0005B
+lsquo=02018
+lsquor=0201A
+lstrok=00142
+lt=0003C
+ltcc=02AA6
+ltcir=02A79
+ltdot=022D6
+lthree=022CB
+ltimes=022C9
+ltlarr=02976
+ltquest=02A7B
+ltrPar=02996
+ltri=025C3
+ltrie=022B4
+ltrif=025C2
+lurdshar=0294A
+luruhar=02966
+mDDot=0223A
+macr=000AF
+male=02642
+malt=02720
+maltese=02720
+map=021A6
+mapsto=021A6
+mapstodown=021A7
+mapstoleft=021A4
+mapstoup=021A5
+marker=025AE
+mcomma=02A29
+mcy=0043C
+mdash=02014
+measuredangle=02221
+mfr=1D52A
+mho=02127
+micro=000B5
+mid=02223
+midast=0002A
+midcir=02AF0
+middot=000B7
+minus=02212
+minusb=0229F
+minusd=02238
+minusdu=02A2A
+mlcp=02ADB
+mldr=02026
+mnplus=02213
+models=022A7
+mopf=1D55E
+mp=02213
+mscr=1D4C2
+mstpos=0223E
+mu=003BC
+multimap=022B8
+mumap=022B8
+nLeftarrow=021CD
+nLeftrightarrow=021CE
+nRightarrow=021CF
+nVDash=022AF
+nVdash=022AE
+nabla=02207
+nacute=00144
+nap=02249
+napos=00149
+napprox=02249
+natur=0266E
+natural=0266E
+naturals=02115
+nbsp=000A0
+ncap=02A43
+ncaron=00148
+ncedil=00146
+ncong=02247
+ncup=02A42
+ncy=0043D
+ndash=02013
+ne=02260
+neArr=021D7
+nearhk=02924
+nearr=02197
+nearrow=02197
+nequiv=02262
+nesear=02928
+nexist=02204
+nexists=02204
+nfr=1D52B
+nge=02271
+ngeq=02271
+ngsim=02275
+ngt=0226F
+ngtr=0226F
+nhArr=021CE
+nharr=021AE
+nhpar=02AF2
+ni=0220B
+nis=022FC
+nisd=022FA
+niv=0220B
+njcy=0045A
+nlArr=021CD
+nlarr=0219A
+nldr=02025
+nle=02270
+nleftarrow=0219A
+nleftrightarrow=021AE
+nleq=02270
+nless=0226E
+nlsim=02274
+nlt=0226E
+nltri=022EA
+nltrie=022EC
+nmid=02224
+nopf=1D55F
+not=000AC
+notin=02209
+notinva=02209
+notinvb=022F7
+notinvc=022F6
+notni=0220C
+notniva=0220C
+notnivb=022FE
+notnivc=022FD
+npar=02226
+nparallel=02226
+npolint=02A14
+npr=02280
+nprcue=022E0
+nprec=02280
+nrArr=021CF
+nrarr=0219B
+nrightarrow=0219B
+nrtri=022EB
+nrtrie=022ED
+nsc=02281
+nsccue=022E1
+nscr=1D4C3
+nshortmid=02224
+nshortparallel=02226
+nsim=02241
+nsime=02244
+nsimeq=02244
+nsmid=02224
+nspar=02226
+nsqsube=022E2
+nsqsupe=022E3
+nsub=02284
+nsube=02288
+nsubseteq=02288
+nsucc=02281
+nsup=02285
+nsupe=02289
+nsupseteq=02289
+ntgl=02279
+ntilde=000F1
+ntlg=02278
+ntriangleleft=022EA
+ntrianglelefteq=022EC
+ntriangleright=022EB
+ntrianglerighteq=022ED
+nu=003BD
+num=00023
+numero=02116
+numsp=02007
+nvDash=022AD
+nvHarr=02904
+nvdash=022AC
+nvinfin=029DE
+nvlArr=02902
+nvrArr=02903
+nwArr=021D6
+nwarhk=02923
+nwarr=02196
+nwarrow=02196
+nwnear=02927
+oS=024C8
+oacute=000F3
+oast=0229B
+ocir=0229A
+ocirc=000F4
+ocy=0043E
+odash=0229D
+odblac=00151
+odiv=02A38
+odot=02299
+odsold=029BC
+oelig=00153
+ofcir=029BF
+ofr=1D52C
+ogon=002DB
+ograve=000F2
+ogt=029C1
+ohbar=029B5
+ohm=003A9
+oint=0222E
+olarr=021BA
+olcir=029BE
+olcross=029BB
+oline=0203E
+olt=029C0
+omacr=0014D
+omega=003C9
+omicron=003BF
+omid=029B6
+ominus=02296
+oopf=1D560
+opar=029B7
+operp=029B9
+oplus=02295
+or=02228
+orarr=021BB
+ord=02A5D
+order=02134
+orderof=02134
+ordf=000AA
+ordm=000BA
+origof=022B6
+oror=02A56
+orslope=02A57
+orv=02A5B
+oscr=02134
+oslash=000F8
+osol=02298
+otilde=000F5
+otimes=02297
+otimesas=02A36
+ouml=000F6
+ovbar=0233D
+par=02225
+para=000B6
+parallel=02225
+parsim=02AF3
+parsl=02AFD
+part=02202
+pcy=0043F
+percnt=00025
+period=0002E
+permil=02030
+perp=022A5
+pertenk=02031
+pfr=1D52D
+phi=003C6
+phiv=003D5
+phmmat=02133
+phone=0260E
+pi=003C0
+pitchfork=022D4
+piv=003D6
+planck=0210F
+planckh=0210E
+plankv=0210F
+plus=0002B
+plusacir=02A23
+plusb=0229E
+pluscir=02A22
+plusdo=02214
+plusdu=02A25
+pluse=02A72
+plusmn=000B1
+plussim=02A26
+plustwo=02A27
+pm=000B1
+pointint=02A15
+popf=1D561
+pound=000A3
+pr=0227A
+prE=02AB3
+prap=02AB7
+prcue=0227C
+pre=02AAF
+prec=0227A
+precapprox=02AB7
+preccurlyeq=0227C
+preceq=02AAF
+precnapprox=02AB9
+precneqq=02AB5
+precnsim=022E8
+precsim=0227E
+prime=02032
+primes=02119
+prnE=02AB5
+prnap=02AB9
+prnsim=022E8
+prod=0220F
+profalar=0232E
+profline=02312
+profsurf=02313
+prop=0221D
+propto=0221D
+prsim=0227E
+prurel=022B0
+pscr=1D4C5
+psi=003C8
+puncsp=02008
+qfr=1D52E
+qint=02A0C
+qopf=1D562
+qprime=02057
+qscr=1D4C6
+quaternions=0210D
+quatint=02A16
+quest=0003F
+questeq=0225F
+quot=00022
+rAarr=021DB
+rArr=021D2
+rAtail=0291C
+rBarr=0290F
+rHar=02964
+racute=00155
+radic=0221A
+raemptyv=029B3
+rang=027E9
+rangd=02992
+range=029A5
+rangle=027E9
+raquo=000BB
+rarr=02192
+rarrap=02975
+rarrb=021E5
+rarrbfs=02920
+rarrc=02933
+rarrfs=0291E
+rarrhk=021AA
+rarrlp=021AC
+rarrpl=02945
+rarrsim=02974
+rarrtl=021A3
+rarrw=0219D
+ratail=0291A
+ratio=02236
+rationals=0211A
+rbarr=0290D
+rbbrk=02773
+rbrace=0007D
+rbrack=0005D
+rbrke=0298C
+rbrksld=0298E
+rbrkslu=02990
+rcaron=00159
+rcedil=00157
+rceil=02309
+rcub=0007D
+rcy=00440
+rdca=02937
+rdldhar=02969
+rdquo=0201D
+rdquor=0201D
+rdsh=021B3
+real=0211C
+realine=0211B
+realpart=0211C
+reals=0211D
+rect=025AD
+reg=000AE
+rfisht=0297D
+rfloor=0230B
+rfr=1D52F
+rhard=021C1
+rharu=021C0
+rharul=0296C
+rho=003C1
+rhov=003F1
+rightarrow=02192
+rightarrowtail=021A3
+rightharpoondown=021C1
+rightharpoonup=021C0
+rightleftarrows=021C4
+rightleftharpoons=021CC
+rightrightarrows=021C9
+rightsquigarrow=0219D
+rightthreetimes=022CC
+ring=002DA
+risingdotseq=02253
+rlarr=021C4
+rlhar=021CC
+rlm=0200F
+rmoust=023B1
+rmoustache=023B1
+rnmid=02AEE
+roang=027ED
+roarr=021FE
+robrk=027E7
+ropar=02986
+ropf=1D563
+roplus=02A2E
+rotimes=02A35
+rpar=00029
+rpargt=02994
+rppolint=02A12
+rrarr=021C9
+rsaquo=0203A
+rscr=1D4C7
+rsh=021B1
+rsqb=0005D
+rsquo=02019
+rsquor=02019
+rthree=022CC
+rtimes=022CA
+rtri=025B9
+rtrie=022B5
+rtrif=025B8
+rtriltri=029CE
+ruluhar=02968
+rx=0211E
+sacute=0015B
+sbquo=0201A
+sc=0227B
+scE=02AB4
+scap=02AB8
+scaron=00161
+sccue=0227D
+sce=02AB0
+scedil=0015F
+scirc=0015D
+scnE=02AB6
+scnap=02ABA
+scnsim=022E9
+scpolint=02A13
+scsim=0227F
+scy=00441
+sdot=022C5
+sdotb=022A1
+sdote=02A66
+seArr=021D8
+searhk=02925
+searr=02198
+searrow=02198
+sect=000A7
+semi=0003B
+seswar=02929
+setminus=02216
+setmn=02216
+sext=02736
+sfr=1D530
+sfrown=02322
+sharp=0266F
+shchcy=00449
+shcy=00448
+shortmid=02223
+shortparallel=02225
+shy=000AD
+sigma=003C3
+sigmaf=003C2
+sigmav=003C2
+sim=0223C
+simdot=02A6A
+sime=02243
+simeq=02243
+simg=02A9E
+simgE=02AA0
+siml=02A9D
+simlE=02A9F
+simne=02246
+simplus=02A24
+simrarr=02972
+slarr=02190
+smallsetminus=02216
+smashp=02A33
+smeparsl=029E4
+smid=02223
+smile=02323
+smt=02AAA
+smte=02AAC
+softcy=0044C
+sol=0002F
+solb=029C4
+solbar=0233F
+sopf=1D564
+spades=02660
+spadesuit=02660
+spar=02225
+sqcap=02293
+sqcup=02294
+sqsub=0228F
+sqsube=02291
+sqsubset=0228F
+sqsubseteq=02291
+sqsup=02290
+sqsupe=02292
+sqsupset=02290
+sqsupseteq=02292
+squ=025A1
+square=025A1
+squarf=025AA
+squf=025AA
+srarr=02192
+sscr=1D4C8
+ssetmn=02216
+ssmile=02323
+sstarf=022C6
+star=02606
+starf=02605
+straightepsilon=003F5
+straightphi=003D5
+strns=000AF
+sub=02282
+subE=02AC5
+subdot=02ABD
+sube=02286
+subedot=02AC3
+submult=02AC1
+subnE=02ACB
+subne=0228A
+subplus=02ABF
+subrarr=02979
+subset=02282
+subseteq=02286
+subseteqq=02AC5
+subsetneq=0228A
+subsetneqq=02ACB
+subsim=02AC7
+subsub=02AD5
+subsup=02AD3
+succ=0227B
+succapprox=02AB8
+succcurlyeq=0227D
+succeq=02AB0
+succnapprox=02ABA
+succneqq=02AB6
+succnsim=022E9
+succsim=0227F
+sum=02211
+sung=0266A
+sup1=000B9
+sup2=000B2
+sup3=000B3
+sup=02283
+supE=02AC6
+supdot=02ABE
+supdsub=02AD8
+supe=02287
+supedot=02AC4
+suphsol=027C9
+suphsub=02AD7
+suplarr=0297B
+supmult=02AC2
+supnE=02ACC
+supne=0228B
+supplus=02AC0
+supset=02283
+supseteq=02287
+supseteqq=02AC6
+supsetneq=0228B
+supsetneqq=02ACC
+supsim=02AC8
+supsub=02AD4
+supsup=02AD6
+swArr=021D9
+swarhk=02926
+swarr=02199
+swarrow=02199
+swnwar=0292A
+szlig=000DF
+target=02316
+tau=003C4
+tbrk=023B4
+tcaron=00165
+tcedil=00163
+tcy=00442
+tdot=020DB
+telrec=02315
+tfr=1D531
+there4=02234
+therefore=02234
+theta=003B8
+thetasym=003D1
+thetav=003D1
+thickapprox=02248
+thicksim=0223C
+thinsp=02009
+thkap=02248
+thksim=0223C
+thorn=000FE
+tilde=002DC
+times=000D7
+timesb=022A0
+timesbar=02A31
+timesd=02A30
+tint=0222D
+toea=02928
+top=022A4
+topbot=02336
+topcir=02AF1
+topf=1D565
+topfork=02ADA
+tosa=02929
+tprime=02034
+trade=02122
+triangle=025B5
+triangledown=025BF
+triangleleft=025C3
+trianglelefteq=022B4
+triangleq=0225C
+triangleright=025B9
+trianglerighteq=022B5
+tridot=025EC
+trie=0225C
+triminus=02A3A
+triplus=02A39
+trisb=029CD
+tritime=02A3B
+trpezium=023E2
+tscr=1D4C9
+tscy=00446
+tshcy=0045B
+tstrok=00167
+twixt=0226C
+twoheadleftarrow=0219E
+twoheadrightarrow=021A0
+uArr=021D1
+uHar=02963
+uacute=000FA
+uarr=02191
+ubrcy=0045E
+ubreve=0016D
+ucirc=000FB
+ucy=00443
+udarr=021C5
+udblac=00171
+udhar=0296E
+ufisht=0297E
+ufr=1D532
+ugrave=000F9
+uharl=021BF
+uharr=021BE
+uhblk=02580
+ulcorn=0231C
+ulcorner=0231C
+ulcrop=0230F
+ultri=025F8
+umacr=0016B
+uml=000A8
+uogon=00173
+uopf=1D566
+uparrow=02191
+updownarrow=02195
+upharpoonleft=021BF
+upharpoonright=021BE
+uplus=0228E
+upsi=003C5
+upsih=003D2
+upsilon=003C5
+upuparrows=021C8
+urcorn=0231D
+urcorner=0231D
+urcrop=0230E
+uring=0016F
+urtri=025F9
+uscr=1D4CA
+utdot=022F0
+utilde=00169
+utri=025B5
+utrif=025B4
+uuarr=021C8
+uuml=000FC
+uwangle=029A7
+vArr=021D5
+vBar=02AE8
+vBarv=02AE9
+vDash=022A8
+vangrt=0299C
+varepsilon=003F5
+varkappa=003F0
+varnothing=02205
+varphi=003D5
+varpi=003D6
+varpropto=0221D
+varr=02195
+varrho=003F1
+varsigma=003C2
+vartheta=003D1
+vartriangleleft=022B2
+vartriangleright=022B3
+vcy=00432
+vdash=022A2
+vee=02228
+veebar=022BB
+veeeq=0225A
+vellip=022EE
+verbar=0007C
+vert=0007C
+vfr=1D533
+vltri=022B2
+vopf=1D567
+vprop=0221D
+vrtri=022B3
+vscr=1D4CB
+vzigzag=0299A
+wcirc=00175
+wedbar=02A5F
+wedge=02227
+wedgeq=02259
+weierp=02118
+wfr=1D534
+wopf=1D568
+wp=02118
+wr=02240
+wreath=02240
+wscr=1D4CC
+xcap=022C2
+xcirc=025EF
+xcup=022C3
+xdtri=025BD
+xfr=1D535
+xhArr=027FA
+xharr=027F7
+xi=003BE
+xlArr=027F8
+xlarr=027F5
+xmap=027FC
+xnis=022FB
+xodot=02A00
+xopf=1D569
+xoplus=02A01
+xotime=02A02
+xrArr=027F9
+xrarr=027F6
+xscr=1D4CD
+xsqcup=02A06
+xuplus=02A04
+xutri=025B3
+xvee=022C1
+xwedge=022C0
+yacute=000FD
+yacy=0044F
+ycirc=00177
+ycy=0044B
+yen=000A5
+yfr=1D536
+yicy=00457
+yopf=1D56A
+yscr=1D4CE
+yucy=0044E
+yuml=000FF
+zacute=0017A
+zcaron=0017E
+zcy=00437
+zdot=0017C
+zeetrf=02128
+zeta=003B6
+zfr=1D537
+zhcy=00436
+zigrarr=021DD
+zopf=1D56B
+zscr=1D4CF
+zwj=0200D
+zwnj=0200C
diff --git a/server/src/org/jsoup/nodes/package-info.java b/server/src/org/jsoup/nodes/package-info.java
new file mode 100644
index 0000000000..24b12803ff
--- /dev/null
+++ b/server/src/org/jsoup/nodes/package-info.java
@@ -0,0 +1,4 @@
+/**
+ HTML document structure nodes.
+ */
+package org.jsoup.nodes; \ No newline at end of file