diff options
author | Leif Åstrand <leif@vaadin.com> | 2012-08-09 16:25:06 +0300 |
---|---|---|
committer | Leif Åstrand <leif@vaadin.com> | 2012-08-09 16:39:36 +0300 |
commit | bfaf549a7814cfabbc6ce95b6c865d0fff0b5ead (patch) | |
tree | 548bbddcff0eef2a41ae526f1e1a8b77c003b974 /src/org/jsoup/nodes | |
parent | f673c7b2655620fb1be87dd90d2edfb412f9fe5c (diff) | |
download | vaadin-framework-bfaf549a7814cfabbc6ce95b6c865d0fff0b5ead.tar.gz vaadin-framework-bfaf549a7814cfabbc6ce95b6c865d0fff0b5ead.zip |
Include jsoup library for modifying bootstap page DOM (#9274)
Diffstat (limited to 'src/org/jsoup/nodes')
-rw-r--r-- | src/org/jsoup/nodes/Attribute.java | 131 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Attributes.java | 249 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Comment.java | 46 | ||||
-rw-r--r-- | src/org/jsoup/nodes/DataNode.java | 62 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Document.java | 350 | ||||
-rw-r--r-- | src/org/jsoup/nodes/DocumentType.java | 46 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Element.java | 1119 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Entities.java | 184 | ||||
-rw-r--r-- | src/org/jsoup/nodes/Node.java | 615 | ||||
-rw-r--r-- | src/org/jsoup/nodes/TextNode.java | 175 | ||||
-rw-r--r-- | src/org/jsoup/nodes/XmlDeclaration.java | 48 | ||||
-rw-r--r-- | src/org/jsoup/nodes/entities-base.properties | 106 | ||||
-rw-r--r-- | src/org/jsoup/nodes/entities-full.properties | 2032 | ||||
-rw-r--r-- | src/org/jsoup/nodes/package-info.java | 4 |
14 files changed, 5167 insertions, 0 deletions
diff --git a/src/org/jsoup/nodes/Attribute.java b/src/org/jsoup/nodes/Attribute.java new file mode 100644 index 0000000000..02eb29db83 --- /dev/null +++ b/src/org/jsoup/nodes/Attribute.java @@ -0,0 +1,131 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.Validate; + +import java.util.Map; + +/** + A single key + value attribute. Keys are trimmed and normalised to lower-case. + + @author Jonathan Hedley, jonathan@hedley.net */ +public class Attribute implements Map.Entry<String, String>, Cloneable { + private String key; + private String value; + + /** + * Create a new attribute from unencoded (raw) key and value. + * @param key attribute key + * @param value attribute value + * @see #createFromEncoded + */ + public Attribute(String key, String value) { + Validate.notEmpty(key); + Validate.notNull(value); + this.key = key.trim().toLowerCase(); + this.value = value; + } + + /** + Get the attribute key. + @return the attribute key + */ + public String getKey() { + return key; + } + + /** + Set the attribute key. Gets normalised as per the constructor method. + @param key the new key; must not be null + */ + public void setKey(String key) { + Validate.notEmpty(key); + this.key = key.trim().toLowerCase(); + } + + /** + Get the attribute value. + @return the attribute value + */ + public String getValue() { + return value; + } + + /** + Set the attribute value. + @param value the new attribute value; must not be null + */ + public String setValue(String value) { + Validate.notNull(value); + String old = this.value; + this.value = value; + return old; + } + + /** + Get the HTML representation of this attribute; e.g. {@code href="index.html"}. + @return HTML + */ + public String html() { + return key + "=\"" + Entities.escape(value, (new Document("")).outputSettings()) + "\""; + } + + protected void html(StringBuilder accum, Document.OutputSettings out) { + accum + .append(key) + .append("=\"") + .append(Entities.escape(value, out)) + .append("\""); + } + + /** + Get the string representation of this attribute, implemented as {@link #html()}. + @return string + */ + public String toString() { + return html(); + } + + /** + * Create a new Attribute from an unencoded key and a HTML attribute encoded value. + * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars. + * @param encodedValue HTML attribute encoded value + * @return attribute + */ + public static Attribute createFromEncoded(String unencodedKey, String encodedValue) { + String value = Entities.unescape(encodedValue, true); + return new Attribute(unencodedKey, value); + } + + protected boolean isDataAttribute() { + return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Attribute)) return false; + + Attribute attribute = (Attribute) o; + + if (key != null ? !key.equals(attribute.key) : attribute.key != null) return false; + if (value != null ? !value.equals(attribute.value) : attribute.value != null) return false; + + return true; + } + + @Override + public int hashCode() { + int result = key != null ? key.hashCode() : 0; + result = 31 * result + (value != null ? value.hashCode() : 0); + return result; + } + + @Override + public Attribute clone() { + try { + return (Attribute) super.clone(); // only fields are immutable strings key and value, so no more deep copy required + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/org/jsoup/nodes/Attributes.java b/src/org/jsoup/nodes/Attributes.java new file mode 100644 index 0000000000..9436750fc9 --- /dev/null +++ b/src/org/jsoup/nodes/Attributes.java @@ -0,0 +1,249 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.Validate; + +import java.util.*; + +/** + * The attributes of an Element. + * <p/> + * Attributes are treated as a map: there can be only one value associated with an attribute key. + * <p/> + * Attribute key and value comparisons are done case insensitively, and keys are normalised to + * lower-case. + * + * @author Jonathan Hedley, jonathan@hedley.net + */ +public class Attributes implements Iterable<Attribute>, Cloneable { + protected static final String dataPrefix = "data-"; + + private LinkedHashMap<String, Attribute> attributes = null; + // linked hash map to preserve insertion order. + // null be default as so many elements have no attributes -- saves a good chunk of memory + + /** + Get an attribute value by key. + @param key the attribute key + @return the attribute value if set; or empty string if not set. + @see #hasKey(String) + */ + public String get(String key) { + Validate.notEmpty(key); + + if (attributes == null) + return ""; + + Attribute attr = attributes.get(key.toLowerCase()); + return attr != null ? attr.getValue() : ""; + } + + /** + Set a new attribute, or replace an existing one by key. + @param key attribute key + @param value attribute value + */ + public void put(String key, String value) { + Attribute attr = new Attribute(key, value); + put(attr); + } + + /** + Set a new attribute, or replace an existing one by key. + @param attribute attribute + */ + public void put(Attribute attribute) { + Validate.notNull(attribute); + if (attributes == null) + attributes = new LinkedHashMap<String, Attribute>(2); + attributes.put(attribute.getKey(), attribute); + } + + /** + Remove an attribute by key. + @param key attribute key to remove + */ + public void remove(String key) { + Validate.notEmpty(key); + if (attributes == null) + return; + attributes.remove(key.toLowerCase()); + } + + /** + Tests if these attributes contain an attribute with this key. + @param key key to check for + @return true if key exists, false otherwise + */ + public boolean hasKey(String key) { + return attributes != null && attributes.containsKey(key.toLowerCase()); + } + + /** + Get the number of attributes in this set. + @return size + */ + public int size() { + if (attributes == null) + return 0; + return attributes.size(); + } + + /** + Add all the attributes from the incoming set to this set. + @param incoming attributes to add to these attributes. + */ + public void addAll(Attributes incoming) { + if (incoming.size() == 0) + return; + if (attributes == null) + attributes = new LinkedHashMap<String, Attribute>(incoming.size()); + attributes.putAll(incoming.attributes); + } + + public Iterator<Attribute> iterator() { + return asList().iterator(); + } + + /** + Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes + to keys will not be recognised in the containing set. + @return an view of the attributes as a List. + */ + public List<Attribute> asList() { + if (attributes == null) + return Collections.emptyList(); + + List<Attribute> list = new ArrayList<Attribute>(attributes.size()); + for (Map.Entry<String, Attribute> entry : attributes.entrySet()) { + list.add(entry.getValue()); + } + return Collections.unmodifiableList(list); + } + + /** + * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys + * starting with {@code data-}. + * @return map of custom data attributes. + */ + public Map<String, String> dataset() { + return new Dataset(); + } + + /** + Get the HTML representation of these attributes. + @return HTML + */ + public String html() { + StringBuilder accum = new StringBuilder(); + html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used + return accum.toString(); + } + + void html(StringBuilder accum, Document.OutputSettings out) { + if (attributes == null) + return; + + for (Map.Entry<String, Attribute> entry : attributes.entrySet()) { + Attribute attribute = entry.getValue(); + accum.append(" "); + attribute.html(accum, out); + } + } + + public String toString() { + return html(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Attributes)) return false; + + Attributes that = (Attributes) o; + + if (attributes != null ? !attributes.equals(that.attributes) : that.attributes != null) return false; + + return true; + } + + @Override + public int hashCode() { + return attributes != null ? attributes.hashCode() : 0; + } + + @Override + public Attributes clone() { + if (attributes == null) + return new Attributes(); + + Attributes clone; + try { + clone = (Attributes) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + clone.attributes = new LinkedHashMap<String, Attribute>(attributes.size()); + for (Attribute attribute: this) + clone.attributes.put(attribute.getKey(), attribute.clone()); + return clone; + } + + private class Dataset extends AbstractMap<String, String> { + + private Dataset() { + if (attributes == null) + attributes = new LinkedHashMap<String, Attribute>(2); + } + + public Set<Entry<String, String>> entrySet() { + return new EntrySet(); + } + + @Override + public String put(String key, String value) { + String dataKey = dataKey(key); + String oldValue = hasKey(dataKey) ? attributes.get(dataKey).getValue() : null; + Attribute attr = new Attribute(dataKey, value); + attributes.put(dataKey, attr); + return oldValue; + } + + private class EntrySet extends AbstractSet<Map.Entry<String, String>> { + public Iterator<Map.Entry<String, String>> iterator() { + return new DatasetIterator(); + } + + public int size() { + int count = 0; + Iterator iter = new DatasetIterator(); + while (iter.hasNext()) + count++; + return count; + } + } + + private class DatasetIterator implements Iterator<Map.Entry<String, String>> { + private Iterator<Attribute> attrIter = attributes.values().iterator(); + private Attribute attr; + public boolean hasNext() { + while (attrIter.hasNext()) { + attr = attrIter.next(); + if (attr.isDataAttribute()) return true; + } + return false; + } + + public Entry<String, String> next() { + return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); + } + + public void remove() { + attributes.remove(attr.getKey()); + } + } + } + + private static String dataKey(String key) { + return dataPrefix + key; + } +} diff --git a/src/org/jsoup/nodes/Comment.java b/src/org/jsoup/nodes/Comment.java new file mode 100644 index 0000000000..37fd4368fa --- /dev/null +++ b/src/org/jsoup/nodes/Comment.java @@ -0,0 +1,46 @@ +package org.jsoup.nodes; + +/** + A comment node. + + @author Jonathan Hedley, jonathan@hedley.net */ +public class Comment extends Node { + private static final String COMMENT_KEY = "comment"; + + /** + Create a new comment node. + @param data The contents of the comment + @param baseUri base URI + */ + public Comment(String data, String baseUri) { + super(baseUri); + attributes.put(COMMENT_KEY, data); + } + + public String nodeName() { + return "#comment"; + } + + /** + Get the contents of the comment. + @return comment content + */ + public String getData() { + return attributes.get(COMMENT_KEY); + } + + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + if (out.prettyPrint()) + indent(accum, depth, out); + accum + .append("<!--") + .append(getData()) + .append("-->"); + } + + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {} + + public String toString() { + return outerHtml(); + } +} diff --git a/src/org/jsoup/nodes/DataNode.java b/src/org/jsoup/nodes/DataNode.java new file mode 100644 index 0000000000..a64f56f0a4 --- /dev/null +++ b/src/org/jsoup/nodes/DataNode.java @@ -0,0 +1,62 @@ +package org.jsoup.nodes; + +/** + A data node, for contents of style, script tags etc, where contents should not show in text(). + + @author Jonathan Hedley, jonathan@hedley.net */ +public class DataNode extends Node{ + private static final String DATA_KEY = "data"; + + /** + Create a new DataNode. + @param data data contents + @param baseUri base URI + */ + public DataNode(String data, String baseUri) { + super(baseUri); + attributes.put(DATA_KEY, data); + } + + public String nodeName() { + return "#data"; + } + + /** + Get the data contents of this node. Will be unescaped and with original new lines, space etc. + @return data + */ + public String getWholeData() { + return attributes.get(DATA_KEY); + } + + /** + * Set the data contents of this node. + * @param data unencoded data + * @return this node, for chaining + */ + public DataNode setWholeData(String data) { + attributes.put(DATA_KEY, data); + return this; + } + + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain + } + + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {} + + public String toString() { + return outerHtml(); + } + + /** + Create a new DataNode from HTML encoded data. + @param encodedData encoded data + @param baseUri bass URI + @return new DataNode + */ + public static DataNode createFromEncoded(String encodedData, String baseUri) { + String data = Entities.unescape(encodedData); + return new DataNode(data, baseUri); + } +} diff --git a/src/org/jsoup/nodes/Document.java b/src/org/jsoup/nodes/Document.java new file mode 100644 index 0000000000..adb371ce14 --- /dev/null +++ b/src/org/jsoup/nodes/Document.java @@ -0,0 +1,350 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.Validate; +import org.jsoup.parser.Tag; +import org.jsoup.select.Elements; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.util.ArrayList; +import java.util.List; + +/** + A HTML Document. + + @author Jonathan Hedley, jonathan@hedley.net */ +public class Document extends Element { + private OutputSettings outputSettings = new OutputSettings(); + private QuirksMode quirksMode = QuirksMode.noQuirks; + + /** + Create a new, empty Document. + @param baseUri base URI of document + @see org.jsoup.Jsoup#parse + @see #createShell + */ + public Document(String baseUri) { + super(Tag.valueOf("#root"), baseUri); + } + + /** + Create a valid, empty shell of a document, suitable for adding more elements to. + @param baseUri baseUri of document + @return document with html, head, and body elements. + */ + static public Document createShell(String baseUri) { + Validate.notNull(baseUri); + + Document doc = new Document(baseUri); + Element html = doc.appendElement("html"); + html.appendElement("head"); + html.appendElement("body"); + + return doc; + } + + /** + Accessor to the document's {@code head} element. + @return {@code head} + */ + public Element head() { + return findFirstElementByTagName("head", this); + } + + /** + Accessor to the document's {@code body} element. + @return {@code body} + */ + public Element body() { + return findFirstElementByTagName("body", this); + } + + /** + Get the string contents of the document's {@code title} element. + @return Trimmed title, or empty string if none set. + */ + public String title() { + Element titleEl = getElementsByTag("title").first(); + return titleEl != null ? titleEl.text().trim() : ""; + } + + /** + Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if + not present + @param title string to set as title + */ + public void title(String title) { + Validate.notNull(title); + Element titleEl = getElementsByTag("title").first(); + if (titleEl == null) { // add to head + head().appendElement("title").text(title); + } else { + titleEl.text(title); + } + } + + /** + Create a new Element, with this document's base uri. Does not make the new element a child of this document. + @param tagName element tag name (e.g. {@code a}) + @return new element + */ + public Element createElement(String tagName) { + return new Element(Tag.valueOf(tagName), this.baseUri()); + } + + /** + Normalise the document. This happens after the parse phase so generally does not need to be called. + Moves any text content that is not in the body element into the body. + @return this document after normalisation + */ + public Document normalise() { + Element htmlEl = findFirstElementByTagName("html", this); + if (htmlEl == null) + htmlEl = appendElement("html"); + if (head() == null) + htmlEl.prependElement("head"); + if (body() == null) + htmlEl.appendElement("body"); + + // pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care + // of. do in inverse order to maintain text order. + normaliseTextNodes(head()); + normaliseTextNodes(htmlEl); + normaliseTextNodes(this); + + normaliseStructure("head", htmlEl); + normaliseStructure("body", htmlEl); + + return this; + } + + // does not recurse. + private void normaliseTextNodes(Element element) { + List<Node> toMove = new ArrayList<Node>(); + for (Node node: element.childNodes) { + if (node instanceof TextNode) { + TextNode tn = (TextNode) node; + if (!tn.isBlank()) + toMove.add(tn); + } + } + + for (int i = toMove.size()-1; i >= 0; i--) { + Node node = toMove.get(i); + element.removeChild(node); + body().prependChild(new TextNode(" ", "")); + body().prependChild(node); + } + } + + // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html> + private void normaliseStructure(String tag, Element htmlEl) { + Elements elements = this.getElementsByTag(tag); + Element master = elements.first(); // will always be available as created above if not existent + if (elements.size() > 1) { // dupes, move contents to master + List<Node> toMove = new ArrayList<Node>(); + for (int i = 1; i < elements.size(); i++) { + Node dupe = elements.get(i); + for (Node node : dupe.childNodes) + toMove.add(node); + dupe.remove(); + } + + for (Node dupe : toMove) + master.appendChild(dupe); + } + // ensure parented by <html> + if (!master.parent().equals(htmlEl)) { + htmlEl.appendChild(master); // includes remove() + } + } + + // fast method to get first by tag name, used for html, head, body finders + private Element findFirstElementByTagName(String tag, Node node) { + if (node.nodeName().equals(tag)) + return (Element) node; + else { + for (Node child: node.childNodes) { + Element found = findFirstElementByTagName(tag, child); + if (found != null) + return found; + } + } + return null; + } + + @Override + public String outerHtml() { + return super.html(); // no outer wrapper tag + } + + /** + Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared. + @param text unencoded text + @return this document + */ + @Override + public Element text(String text) { + body().text(text); // overridden to not nuke doc structure + return this; + } + + @Override + public String nodeName() { + return "#document"; + } + + @Override + public Document clone() { + Document clone = (Document) super.clone(); + clone.outputSettings = this.outputSettings.clone(); + return clone; + } + + /** + * A Document's output settings control the form of the text() and html() methods. + */ + public static class OutputSettings implements Cloneable { + private Entities.EscapeMode escapeMode = Entities.EscapeMode.base; + private Charset charset = Charset.forName("UTF-8"); + private CharsetEncoder charsetEncoder = charset.newEncoder(); + private boolean prettyPrint = true; + private int indentAmount = 1; + + public OutputSettings() {} + + /** + * Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML + * entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>, + * which uses the complete set of HTML named entities. + * <p> + * The default escape mode is <code>base</code>. + * @return the document's current escape mode + */ + public Entities.EscapeMode escapeMode() { + return escapeMode; + } + + /** + * Set the document's escape mode + * @param escapeMode the new escape mode to use + * @return the document's output settings, for chaining + */ + public OutputSettings escapeMode(Entities.EscapeMode escapeMode) { + this.escapeMode = escapeMode; + return this; + } + + /** + * Get the document's current output charset, which is used to control which characters are escaped when + * generating HTML (via the <code>html()</code> methods), and which are kept intact. + * <p> + * Where possible (when parsing from a URL or File), the document's output charset is automatically set to the + * input charset. Otherwise, it defaults to UTF-8. + * @return the document's current charset. + */ + public Charset charset() { + return charset; + } + + /** + * Update the document's output charset. + * @param charset the new charset to use. + * @return the document's output settings, for chaining + */ + public OutputSettings charset(Charset charset) { + // todo: this should probably update the doc's meta charset + this.charset = charset; + charsetEncoder = charset.newEncoder(); + return this; + } + + /** + * Update the document's output charset. + * @param charset the new charset (by name) to use. + * @return the document's output settings, for chaining + */ + public OutputSettings charset(String charset) { + charset(Charset.forName(charset)); + return this; + } + + CharsetEncoder encoder() { + return charsetEncoder; + } + + /** + * Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format + * the output, and the output will generally look like the input. + * @return if pretty printing is enabled. + */ + public boolean prettyPrint() { + return prettyPrint; + } + + /** + * Enable or disable pretty printing. + * @param pretty new pretty print setting + * @return this, for chaining + */ + public OutputSettings prettyPrint(boolean pretty) { + prettyPrint = pretty; + return this; + } + + /** + * Get the current tag indent amount, used when pretty printing. + * @return the current indent amount + */ + public int indentAmount() { + return indentAmount; + } + + /** + * Set the indent amount for pretty printing + * @param indentAmount number of spaces to use for indenting each level. Must be >= 0. + * @return this, for chaining + */ + public OutputSettings indentAmount(int indentAmount) { + Validate.isTrue(indentAmount >= 0); + this.indentAmount = indentAmount; + return this; + } + + @Override + public OutputSettings clone() { + OutputSettings clone; + try { + clone = (OutputSettings) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + clone.charset(charset.name()); // new charset and charset encoder + clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); + // indentAmount, prettyPrint are primitives so object.clone() will handle + return clone; + } + } + + /** + * Get the document's current output settings. + * @return the document's current output settings. + */ + public OutputSettings outputSettings() { + return outputSettings; + } + + public enum QuirksMode { + noQuirks, quirks, limitedQuirks; + } + + public QuirksMode quirksMode() { + return quirksMode; + } + + public Document quirksMode(QuirksMode quirksMode) { + this.quirksMode = quirksMode; + return this; + } +} + diff --git a/src/org/jsoup/nodes/DocumentType.java b/src/org/jsoup/nodes/DocumentType.java new file mode 100644 index 0000000000..f8c79f0d18 --- /dev/null +++ b/src/org/jsoup/nodes/DocumentType.java @@ -0,0 +1,46 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; + +/** + * A {@code <!DOCTPYE>} node. + */ +public class DocumentType extends Node { + // todo: quirk mode from publicId and systemId + + /** + * Create a new doctype element. + * @param name the doctype's name + * @param publicId the doctype's public ID + * @param systemId the doctype's system ID + * @param baseUri the doctype's base URI + */ + public DocumentType(String name, String publicId, String systemId, String baseUri) { + super(baseUri); + + Validate.notEmpty(name); + attr("name", name); + attr("publicId", publicId); + attr("systemId", systemId); + } + + @Override + public String nodeName() { + return "#doctype"; + } + + @Override + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + accum.append("<!DOCTYPE ").append(attr("name")); + if (!StringUtil.isBlank(attr("publicId"))) + accum.append(" PUBLIC \"").append(attr("publicId")).append("\""); + if (!StringUtil.isBlank(attr("systemId"))) + accum.append(" \"").append(attr("systemId")).append("\""); + accum.append('>'); + } + + @Override + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) { + } +} diff --git a/src/org/jsoup/nodes/Element.java b/src/org/jsoup/nodes/Element.java new file mode 100644 index 0000000000..5c1894c934 --- /dev/null +++ b/src/org/jsoup/nodes/Element.java @@ -0,0 +1,1119 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; +import org.jsoup.parser.Parser; +import org.jsoup.parser.Tag; +import org.jsoup.select.Collector; +import org.jsoup.select.Elements; +import org.jsoup.select.Evaluator; +import org.jsoup.select.Selector; + +import java.util.*; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +/** + * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and + * other elements). + * + * From an Element, you can extract data, traverse the node graph, and manipulate the HTML. + * + * @author Jonathan Hedley, jonathan@hedley.net + */ +public class Element extends Node { + private Tag tag; + private Set<String> classNames; + + /** + * Create a new, standalone Element. (Standalone in that is has no parent.) + * + * @param tag tag of this element + * @param baseUri the base URI + * @param attributes initial attributes + * @see #appendChild(Node) + * @see #appendElement(String) + */ + public Element(Tag tag, String baseUri, Attributes attributes) { + super(baseUri, attributes); + + Validate.notNull(tag); + this.tag = tag; + } + + /** + * Create a new Element from a tag and a base URI. + * + * @param tag element tag + * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty + * string, but not null. + * @see Tag#valueOf(String) + */ + public Element(Tag tag, String baseUri) { + this(tag, baseUri, new Attributes()); + } + + @Override + public String nodeName() { + return tag.getName(); + } + + /** + * Get the name of the tag for this element. E.g. {@code div} + * + * @return the tag name + */ + public String tagName() { + return tag.getName(); + } + + /** + * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with + * {@code el.tagName("div");}. + * + * @param tagName new tag name for this element + * @return this element, for chaining + */ + public Element tagName(String tagName) { + Validate.notEmpty(tagName, "Tag name must not be empty."); + tag = Tag.valueOf(tagName); + return this; + } + + /** + * Get the Tag for this element. + * + * @return the tag object + */ + public Tag tag() { + return tag; + } + + /** + * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element + * {@code <p> == false}). + * + * @return true if block, false if not (and thus inline) + */ + public boolean isBlock() { + return tag.isBlock(); + } + + /** + * Get the {@code id} attribute of this element. + * + * @return The id attribute, if present, or an empty string if not. + */ + public String id() { + String id = attr("id"); + return id == null ? "" : id; + } + + /** + * Set an attribute value on this element. If this element already has an attribute with the + * key, its value is updated; otherwise, a new attribute is added. + * + * @return this element + */ + public Element attr(String attributeKey, String attributeValue) { + super.attr(attributeKey, attributeValue); + return this; + } + + /** + * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key + * starting with "data-" is included the dataset. + * <p> + * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset + * {@code package=jsoup, language=java}. + * <p> + * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected + * in the other map. + * <p> + * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. + * @return a map of {@code key=value} custom data attributes. + */ + public Map<String, String> dataset() { + return attributes.dataset(); + } + + @Override + public final Element parent() { + return (Element) parentNode; + } + + /** + * Get this element's parent and ancestors, up to the document root. + * @return this element's stack of parents, closest first. + */ + public Elements parents() { + Elements parents = new Elements(); + accumulateParents(this, parents); + return parents; + } + + private static void accumulateParents(Element el, Elements parents) { + Element parent = el.parent(); + if (parent != null && !parent.tagName().equals("#root")) { + parents.add(parent); + accumulateParents(parent, parents); + } + } + + /** + * Get a child element of this element, by its 0-based index number. + * <p/> + * Note that an element can have both mixed Nodes and Elements as children. This method inspects + * a filtered list of children that are elements, and the index is based on that filtered list. + * + * @param index the index number of the element to retrieve + * @return the child element, if it exists, or {@code null} if absent. + * @see #childNode(int) + */ + public Element child(int index) { + return children().get(index); + } + + /** + * Get this element's child elements. + * <p/> + * This is effectively a filter on {@link #childNodes()} to get Element nodes. + * @return child elements. If this element has no children, returns an + * empty list. + * @see #childNodes() + */ + public Elements children() { + // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change + List<Element> elements = new ArrayList<Element>(); + for (Node node : childNodes) { + if (node instanceof Element) + elements.add((Element) node); + } + return new Elements(elements); + } + + /** + * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. + * <p/> + * This is effectively a filter on {@link #childNodes()} to get Text nodes. + * @return child text nodes. If this element has no text nodes, returns an + * empty list. + * <p/> + * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: + * <ul> + * <li>{@code p.text()} = {@code "One Two Three Four"}</li> + * <li>{@code p.ownText()} = {@code "One Three Four"}</li> + * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> + * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> + * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> + * </ul> + */ + public List<TextNode> textNodes() { + List<TextNode> textNodes = new ArrayList<TextNode>(); + for (Node node : childNodes) { + if (node instanceof TextNode) + textNodes.add((TextNode) node); + } + return Collections.unmodifiableList(textNodes); + } + + /** + * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. + * <p/> + * This is effectively a filter on {@link #childNodes()} to get Data nodes. + * @return child data nodes. If this element has no data nodes, returns an + * empty list. + * @see #data() + */ + public List<DataNode> dataNodes() { + List<DataNode> dataNodes = new ArrayList<DataNode>(); + for (Node node : childNodes) { + if (node instanceof DataNode) + dataNodes.add((DataNode) node); + } + return Collections.unmodifiableList(dataNodes); + } + + /** + * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements + * may include this element, or any of its children. + * <p/> + * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because + * multiple filters can be combined, e.g.: + * <ul> + * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) + * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) + * </ul> + * <p/> + * See the query syntax documentation in {@link org.jsoup.select.Selector}. + * + * @param cssQuery a {@link Selector} CSS-like query + * @return elements that match the query (empty if none match) + * @see org.jsoup.select.Selector + */ + public Elements select(String cssQuery) { + return Selector.select(cssQuery, this); + } + + /** + * Add a node child node to this element. + * + * @param child node to add. Must not already have a parent. + * @return this element, so that you can add more child nodes or elements. + */ + public Element appendChild(Node child) { + Validate.notNull(child); + + addChildren(child); + return this; + } + + /** + * Add a node to the start of this element's children. + * + * @param child node to add. Must not already have a parent. + * @return this element, so that you can add more child nodes or elements. + */ + public Element prependChild(Node child) { + Validate.notNull(child); + + addChildren(0, child); + return this; + } + + /** + * Create a new element by tag name, and add it as the last child. + * + * @param tagName the name of the tag (e.g. {@code div}). + * @return the new element, to allow you to add content to it, e.g.: + * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} + */ + public Element appendElement(String tagName) { + Element child = new Element(Tag.valueOf(tagName), baseUri()); + appendChild(child); + return child; + } + + /** + * Create a new element by tag name, and add it as the first child. + * + * @param tagName the name of the tag (e.g. {@code div}). + * @return the new element, to allow you to add content to it, e.g.: + * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} + */ + public Element prependElement(String tagName) { + Element child = new Element(Tag.valueOf(tagName), baseUri()); + prependChild(child); + return child; + } + + /** + * Create and append a new TextNode to this element. + * + * @param text the unencoded text to add + * @return this element + */ + public Element appendText(String text) { + TextNode node = new TextNode(text, baseUri()); + appendChild(node); + return this; + } + + /** + * Create and prepend a new TextNode to this element. + * + * @param text the unencoded text to add + * @return this element + */ + public Element prependText(String text) { + TextNode node = new TextNode(text, baseUri()); + prependChild(node); + return this; + } + + /** + * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. + * @param html HTML to add inside this element, after the existing HTML + * @return this element + * @see #html(String) + */ + public Element append(String html) { + Validate.notNull(html); + + List<Node> nodes = Parser.parseFragment(html, this, baseUri()); + addChildren(nodes.toArray(new Node[nodes.size()])); + return this; + } + + /** + * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. + * @param html HTML to add inside this element, before the existing HTML + * @return this element + * @see #html(String) + */ + public Element prepend(String html) { + Validate.notNull(html); + + List<Node> nodes = Parser.parseFragment(html, this, baseUri()); + addChildren(0, nodes.toArray(new Node[nodes.size()])); + return this; + } + + /** + * Insert the specified HTML into the DOM before this element (i.e. as a preceding sibling). + * + * @param html HTML to add before this element + * @return this element, for chaining + * @see #after(String) + */ + @Override + public Element before(String html) { + return (Element) super.before(html); + } + + /** + * Insert the specified node into the DOM before this node (i.e. as a preceding sibling). + * @param node to add before this element + * @return this Element, for chaining + * @see #after(Node) + */ + @Override + public Element before(Node node) { + return (Element) super.before(node); + } + + /** + * Insert the specified HTML into the DOM after this element (i.e. as a following sibling). + * + * @param html HTML to add after this element + * @return this element, for chaining + * @see #before(String) + */ + @Override + public Element after(String html) { + return (Element) super.after(html); + } + + /** + * Insert the specified node into the DOM after this node (i.e. as a following sibling). + * @param node to add after this element + * @return this element, for chaining + * @see #before(Node) + */ + @Override + public Element after(Node node) { + return (Element) super.after(node); + } + + /** + * Remove all of the element's child nodes. Any attributes are left as-is. + * @return this element + */ + public Element empty() { + childNodes.clear(); + return this; + } + + /** + * Wrap the supplied HTML around this element. + * + * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. + * @return this element, for chaining. + */ + @Override + public Element wrap(String html) { + return (Element) super.wrap(html); + } + + /** + * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling + * of itself, so will not be included in the returned list. + * @return sibling elements + */ + public Elements siblingElements() { + if (parentNode == null) + return new Elements(0); + + List<Element> elements = parent().children(); + Elements siblings = new Elements(elements.size() - 1); + for (Element el: elements) + if (el != this) + siblings.add(el); + return siblings; + } + + /** + * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, + * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. + * <p/> + * This is similar to {@link #nextSibling()}, but specifically finds only Elements + * @return the next element, or null if there is no next element + * @see #previousElementSibling() + */ + public Element nextElementSibling() { + if (parentNode == null) return null; + List<Element> siblings = parent().children(); + Integer index = indexInList(this, siblings); + Validate.notNull(index); + if (siblings.size() > index+1) + return siblings.get(index+1); + else + return null; + } + + /** + * Gets the previous element sibling of this element. + * @return the previous element, or null if there is no previous element + * @see #nextElementSibling() + */ + public Element previousElementSibling() { + if (parentNode == null) return null; + List<Element> siblings = parent().children(); + Integer index = indexInList(this, siblings); + Validate.notNull(index); + if (index > 0) + return siblings.get(index-1); + else + return null; + } + + /** + * Gets the first element sibling of this element. + * @return the first sibling that is an element (aka the parent's first element child) + */ + public Element firstElementSibling() { + // todo: should firstSibling() exclude this? + List<Element> siblings = parent().children(); + return siblings.size() > 1 ? siblings.get(0) : null; + } + + /** + * Get the list index of this element in its element sibling list. I.e. if this is the first element + * sibling, returns 0. + * @return position in element sibling list + */ + public Integer elementSiblingIndex() { + if (parent() == null) return 0; + return indexInList(this, parent().children()); + } + + /** + * Gets the last element sibling of this element + * @return the last sibling that is an element (aka the parent's last element child) + */ + public Element lastElementSibling() { + List<Element> siblings = parent().children(); + return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null; + } + + private static <E extends Element> Integer indexInList(Element search, List<E> elements) { + Validate.notNull(search); + Validate.notNull(elements); + + for (int i = 0; i < elements.size(); i++) { + E element = elements.get(i); + if (element.equals(search)) + return i; + } + return null; + } + + // DOM type methods + + /** + * Finds elements, including and recursively under this element, with the specified tag name. + * @param tagName The tag name to search for (case insensitively). + * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. + */ + public Elements getElementsByTag(String tagName) { + Validate.notEmpty(tagName); + tagName = tagName.toLowerCase().trim(); + + return Collector.collect(new Evaluator.Tag(tagName), this); + } + + /** + * Find an element by ID, including or under this element. + * <p> + * Note that this finds the first matching ID, starting with this element. If you search down from a different + * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, + * use {@link Document#getElementById(String)} + * @param id The ID to search for. + * @return The first matching element by ID, starting with this element, or null if none found. + */ + public Element getElementById(String id) { + Validate.notEmpty(id); + + Elements elements = Collector.collect(new Evaluator.Id(id), this); + if (elements.size() > 0) + return elements.get(0); + else + return null; + } + + /** + * Find elements that have this class, including or under this element. Case insensitive. + * <p> + * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method + * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. + * + * @param className the name of the class to search for. + * @return elements with the supplied class name, empty if none + * @see #hasClass(String) + * @see #classNames() + */ + public Elements getElementsByClass(String className) { + Validate.notEmpty(className); + + return Collector.collect(new Evaluator.Class(className), this); + } + + /** + * Find elements that have a named attribute set. Case insensitive. + * + * @param key name of the attribute, e.g. {@code href} + * @return elements that have this attribute, empty if none + */ + public Elements getElementsByAttribute(String key) { + Validate.notEmpty(key); + key = key.trim().toLowerCase(); + + return Collector.collect(new Evaluator.Attribute(key), this); + } + + /** + * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements + * that have HTML5 datasets. + * @param keyPrefix name prefix of the attribute e.g. {@code data-} + * @return elements that have attribute names that start with with the prefix, empty if none. + */ + public Elements getElementsByAttributeStarting(String keyPrefix) { + Validate.notEmpty(keyPrefix); + keyPrefix = keyPrefix.trim().toLowerCase(); + + return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); + } + + /** + * Find elements that have an attribute with the specific value. Case insensitive. + * + * @param key name of the attribute + * @param value value of the attribute + * @return elements that have this attribute with this value, empty if none + */ + public Elements getElementsByAttributeValue(String key, String value) { + return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); + } + + /** + * Find elements that either do not have this attribute, or have it with a different value. Case insensitive. + * + * @param key name of the attribute + * @param value value of the attribute + * @return elements that do not have a matching attribute + */ + public Elements getElementsByAttributeValueNot(String key, String value) { + return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); + } + + /** + * Find elements that have attributes that start with the value prefix. Case insensitive. + * + * @param key name of the attribute + * @param valuePrefix start of attribute value + * @return elements that have attributes that start with the value prefix + */ + public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { + return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); + } + + /** + * Find elements that have attributes that end with the value suffix. Case insensitive. + * + * @param key name of the attribute + * @param valueSuffix end of the attribute value + * @return elements that have attributes that end with the value suffix + */ + public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { + return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); + } + + /** + * Find elements that have attributes whose value contains the match string. Case insensitive. + * + * @param key name of the attribute + * @param match substring of value to search for + * @return elements that have attributes containing this text + */ + public Elements getElementsByAttributeValueContaining(String key, String match) { + return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); + } + + /** + * Find elements that have attributes whose values match the supplied regular expression. + * @param key name of the attribute + * @param pattern compiled regular expression to match against attribute values + * @return elements that have attributes matching this regular expression + */ + public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { + return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); + + } + + /** + * Find elements that have attributes whose values match the supplied regular expression. + * @param key name of the attribute + * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. + * @return elements that have attributes matching this regular expression + */ + public Elements getElementsByAttributeValueMatching(String key, String regex) { + Pattern pattern; + try { + pattern = Pattern.compile(regex); + } catch (PatternSyntaxException e) { + throw new IllegalArgumentException("Pattern syntax error: " + regex, e); + } + return getElementsByAttributeValueMatching(key, pattern); + } + + /** + * Find elements whose sibling index is less than the supplied index. + * @param index 0-based index + * @return elements less than index + */ + public Elements getElementsByIndexLessThan(int index) { + return Collector.collect(new Evaluator.IndexLessThan(index), this); + } + + /** + * Find elements whose sibling index is greater than the supplied index. + * @param index 0-based index + * @return elements greater than index + */ + public Elements getElementsByIndexGreaterThan(int index) { + return Collector.collect(new Evaluator.IndexGreaterThan(index), this); + } + + /** + * Find elements whose sibling index is equal to the supplied index. + * @param index 0-based index + * @return elements equal to index + */ + public Elements getElementsByIndexEquals(int index) { + return Collector.collect(new Evaluator.IndexEquals(index), this); + } + + /** + * Find elements that contain the specified string. The search is case insensitive. The text may appear directly + * in the element, or in any of its descendants. + * @param searchText to look for in the element's text + * @return elements that contain the string, case insensitive. + * @see Element#text() + */ + public Elements getElementsContainingText(String searchText) { + return Collector.collect(new Evaluator.ContainsText(searchText), this); + } + + /** + * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly + * in the element, not in any of its descendants. + * @param searchText to look for in the element's own text + * @return elements that contain the string, case insensitive. + * @see Element#ownText() + */ + public Elements getElementsContainingOwnText(String searchText) { + return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); + } + + /** + * Find elements whose text matches the supplied regular expression. + * @param pattern regular expression to match text against + * @return elements matching the supplied regular expression. + * @see Element#text() + */ + public Elements getElementsMatchingText(Pattern pattern) { + return Collector.collect(new Evaluator.Matches(pattern), this); + } + + /** + * Find elements whose text matches the supplied regular expression. + * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. + * @return elements matching the supplied regular expression. + * @see Element#text() + */ + public Elements getElementsMatchingText(String regex) { + Pattern pattern; + try { + pattern = Pattern.compile(regex); + } catch (PatternSyntaxException e) { + throw new IllegalArgumentException("Pattern syntax error: " + regex, e); + } + return getElementsMatchingText(pattern); + } + + /** + * Find elements whose own text matches the supplied regular expression. + * @param pattern regular expression to match text against + * @return elements matching the supplied regular expression. + * @see Element#ownText() + */ + public Elements getElementsMatchingOwnText(Pattern pattern) { + return Collector.collect(new Evaluator.MatchesOwn(pattern), this); + } + + /** + * Find elements whose text matches the supplied regular expression. + * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. + * @return elements matching the supplied regular expression. + * @see Element#ownText() + */ + public Elements getElementsMatchingOwnText(String regex) { + Pattern pattern; + try { + pattern = Pattern.compile(regex); + } catch (PatternSyntaxException e) { + throw new IllegalArgumentException("Pattern syntax error: " + regex, e); + } + return getElementsMatchingOwnText(pattern); + } + + /** + * Find all elements under this element (including self, and children of children). + * + * @return all elements + */ + public Elements getAllElements() { + return Collector.collect(new Evaluator.AllElements(), this); + } + + /** + * Gets the combined text of this element and all its children. + * <p> + * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.text()} returns {@code "Hello there now!"} + * + * @return unencoded text, or empty string if none. + * @see #ownText() + * @see #textNodes() + */ + public String text() { + StringBuilder sb = new StringBuilder(); + text(sb); + return sb.toString().trim(); + } + + private void text(StringBuilder accum) { + appendWhitespaceIfBr(this, accum); + + for (Node child : childNodes) { + if (child instanceof TextNode) { + TextNode textNode = (TextNode) child; + appendNormalisedText(accum, textNode); + } else if (child instanceof Element) { + Element element = (Element) child; + if (accum.length() > 0 && element.isBlock() && !TextNode.lastCharIsWhitespace(accum)) + accum.append(" "); + element.text(accum); + } + } + } + + /** + * Gets the text owned by this element only; does not get the combined text of all children. + * <p> + * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, + * whereas {@code p.text()} returns {@code "Hello there now!"}. + * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. + * + * @return unencoded text, or empty string if none. + * @see #text() + * @see #textNodes() + */ + public String ownText() { + StringBuilder sb = new StringBuilder(); + ownText(sb); + return sb.toString().trim(); + } + + private void ownText(StringBuilder accum) { + for (Node child : childNodes) { + if (child instanceof TextNode) { + TextNode textNode = (TextNode) child; + appendNormalisedText(accum, textNode); + } else if (child instanceof Element) { + appendWhitespaceIfBr((Element) child, accum); + } + } + } + + private void appendNormalisedText(StringBuilder accum, TextNode textNode) { + String text = textNode.getWholeText(); + + if (!preserveWhitespace()) { + text = TextNode.normaliseWhitespace(text); + if (TextNode.lastCharIsWhitespace(accum)) + text = TextNode.stripLeadingWhitespace(text); + } + accum.append(text); + } + + private static void appendWhitespaceIfBr(Element element, StringBuilder accum) { + if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum)) + accum.append(" "); + } + + boolean preserveWhitespace() { + return tag.preserveWhitespace() || parent() != null && parent().preserveWhitespace(); + } + + /** + * Set the text of this element. Any existing contents (text or elements) will be cleared + * @param text unencoded text + * @return this element + */ + public Element text(String text) { + Validate.notNull(text); + + empty(); + TextNode textNode = new TextNode(text, baseUri); + appendChild(textNode); + + return this; + } + + /** + Test if this element has any text content (that is not just whitespace). + @return true if element has non-blank text content. + */ + public boolean hasText() { + for (Node child: childNodes) { + if (child instanceof TextNode) { + TextNode textNode = (TextNode) child; + if (!textNode.isBlank()) + return true; + } else if (child instanceof Element) { + Element el = (Element) child; + if (el.hasText()) + return true; + } + } + return false; + } + + /** + * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag. + * @return the data, or empty string if none + * + * @see #dataNodes() + */ + public String data() { + StringBuilder sb = new StringBuilder(); + + for (Node childNode : childNodes) { + if (childNode instanceof DataNode) { + DataNode data = (DataNode) childNode; + sb.append(data.getWholeData()); + } else if (childNode instanceof Element) { + Element element = (Element) childNode; + String elementData = element.data(); + sb.append(elementData); + } + } + return sb.toString(); + } + + /** + * Gets the literal value of this element's "class" attribute, which may include multiple class names, space + * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") + * @return The literal class attribute, or <b>empty string</b> if no class attribute set. + */ + public String className() { + return attr("class"); + } + + /** + * Get all of the element's class names. E.g. on element {@code <div class="header gray"}>}, + * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to + * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. + * @return set of classnames, empty if no class attribute + */ + public Set<String> classNames() { + if (classNames == null) { + String[] names = className().split("\\s+"); + classNames = new LinkedHashSet<String>(Arrays.asList(names)); + } + return classNames; + } + + /** + Set the element's {@code class} attribute to the supplied class names. + @param classNames set of classes + @return this element, for chaining + */ + public Element classNames(Set<String> classNames) { + Validate.notNull(classNames); + attributes.put("class", StringUtil.join(classNames, " ")); + return this; + } + + /** + * Tests if this element has a class. Case insensitive. + * @param className name of class to check for + * @return true if it does, false if not + */ + public boolean hasClass(String className) { + Set<String> classNames = classNames(); + for (String name : classNames) { + if (className.equalsIgnoreCase(name)) + return true; + } + return false; + } + + /** + Add a class name to this element's {@code class} attribute. + @param className class name to add + @return this element + */ + public Element addClass(String className) { + Validate.notNull(className); + + Set<String> classes = classNames(); + classes.add(className); + classNames(classes); + + return this; + } + + /** + Remove a class name from this element's {@code class} attribute. + @param className class name to remove + @return this element + */ + public Element removeClass(String className) { + Validate.notNull(className); + + Set<String> classes = classNames(); + classes.remove(className); + classNames(classes); + + return this; + } + + /** + Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. + @param className class name to toggle + @return this element + */ + public Element toggleClass(String className) { + Validate.notNull(className); + + Set<String> classes = classNames(); + if (classes.contains(className)) + classes.remove(className); + else + classes.add(className); + classNames(classes); + + return this; + } + + /** + * Get the value of a form element (input, textarea, etc). + * @return the value of the form element, or empty string if not set. + */ + public String val() { + if (tagName().equals("textarea")) + return text(); + else + return attr("value"); + } + + /** + * Set the value of a form element (input, textarea, etc). + * @param value value to set + * @return this element (for chaining) + */ + public Element val(String value) { + if (tagName().equals("textarea")) + text(value); + else + attr("value", value); + return this; + } + + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + if (accum.length() > 0 && out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock()))) + indent(accum, depth, out); + accum + .append("<") + .append(tagName()); + attributes.html(accum, out); + + if (childNodes.isEmpty() && tag.isSelfClosing()) + accum.append(" />"); + else + accum.append(">"); + } + + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) { + if (!(childNodes.isEmpty() && tag.isSelfClosing())) { + if (out.prettyPrint() && !childNodes.isEmpty() && tag.formatAsBlock()) + indent(accum, depth, out); + accum.append("</").append(tagName()).append(">"); + } + } + + /** + * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return + * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) + * + * @return String of HTML. + * @see #outerHtml() + */ + public String html() { + StringBuilder accum = new StringBuilder(); + html(accum); + return accum.toString().trim(); + } + + private void html(StringBuilder accum) { + for (Node node : childNodes) + node.outerHtml(accum); + } + + /** + * Set this element's inner HTML. Clears the existing HTML first. + * @param html HTML to parse and set into this element + * @return this element + * @see #append(String) + */ + public Element html(String html) { + empty(); + append(html); + return this; + } + + public String toString() { + return outerHtml(); + } + + @Override + public boolean equals(Object o) { + return this == o; + } + + @Override + public int hashCode() { + // todo: fixup, not very useful + int result = super.hashCode(); + result = 31 * result + (tag != null ? tag.hashCode() : 0); + return result; + } + + @Override + public Element clone() { + Element clone = (Element) super.clone(); + clone.classNames(); // creates linked set of class names from class attribute + return clone; + } +} diff --git a/src/org/jsoup/nodes/Entities.java b/src/org/jsoup/nodes/Entities.java new file mode 100644 index 0000000000..0ae83e1fc0 --- /dev/null +++ b/src/org/jsoup/nodes/Entities.java @@ -0,0 +1,184 @@ +package org.jsoup.nodes; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.CharsetEncoder; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * HTML entities, and escape routines. + * Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML + * named character references</a>. + */ +public class Entities { + public enum EscapeMode { + /** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */ + xhtml(xhtmlByVal), + /** Default HTML output entities. */ + base(baseByVal), + /** Complete HTML entities. */ + extended(fullByVal); + + private Map<Character, String> map; + + EscapeMode(Map<Character, String> map) { + this.map = map; + } + + public Map<Character, String> getMap() { + return map; + } + } + + private static final Map<String, Character> full; + private static final Map<Character, String> xhtmlByVal; + private static final Map<Character, String> baseByVal; + private static final Map<Character, String> fullByVal; + private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?"); + private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);"); + + private Entities() {} + + /** + * Check if the input is a known named entity + * @param name the possible entity name (e.g. "lt" or "amp" + * @return true if a known named entity + */ + public static boolean isNamedEntity(String name) { + return full.containsKey(name); + } + + /** + * Get the Character value of the named entity + * @param name named entity (e.g. "lt" or "amp") + * @return the Character value of the named entity (e.g. '<' or '&') + */ + public static Character getCharacterByName(String name) { + return full.get(name); + } + + static String escape(String string, Document.OutputSettings out) { + return escape(string, out.encoder(), out.escapeMode()); + } + + static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) { + StringBuilder accum = new StringBuilder(string.length() * 2); + Map<Character, String> map = escapeMode.getMap(); + + for (int pos = 0; pos < string.length(); pos++) { + Character c = string.charAt(pos); + if (map.containsKey(c)) + accum.append('&').append(map.get(c)).append(';'); + else if (encoder.canEncode(c)) + accum.append(c.charValue()); + else + accum.append("&#").append((int) c).append(';'); + } + + return accum.toString(); + } + + static String unescape(String string) { + return unescape(string, false); + } + + /** + * Unescape the input string. + * @param string + * @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional) + * @return + */ + static String unescape(String string, boolean strict) { + // todo: change this method to use Tokeniser.consumeCharacterReference + if (!string.contains("&")) + return string; + + Matcher m = strict? strictUnescapePattern.matcher(string) : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);? + StringBuffer accum = new StringBuffer(string.length()); // pity matcher can't use stringbuilder, avoid syncs + // todo: replace m.appendReplacement with own impl, so StringBuilder and quoteReplacement not required + + while (m.find()) { + int charval = -1; + String num = m.group(3); + if (num != null) { + try { + int base = m.group(2) != null ? 16 : 10; // 2 is hex indicator + charval = Integer.valueOf(num, base); + } catch (NumberFormatException e) { + } // skip + } else { + String name = m.group(1); + if (full.containsKey(name)) + charval = full.get(name); + } + + if (charval != -1 || charval > 0xFFFF) { // out of range + String c = Character.toString((char) charval); + m.appendReplacement(accum, Matcher.quoteReplacement(c)); + } else { + m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace with original string + } + } + m.appendTail(accum); + return accum.toString(); + } + + // xhtml has restricted entities + private static final Object[][] xhtmlArray = { + {"quot", 0x00022}, + {"amp", 0x00026}, + {"apos", 0x00027}, + {"lt", 0x0003C}, + {"gt", 0x0003E} + }; + + static { + xhtmlByVal = new HashMap<Character, String>(); + baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most common / default + full = loadEntities("entities-full.properties"); // extended and overblown. + fullByVal = toCharacterKey(full); + + for (Object[] entity : xhtmlArray) { + Character c = Character.valueOf((char) ((Integer) entity[1]).intValue()); + xhtmlByVal.put(c, ((String) entity[0])); + } + } + + private static Map<String, Character> loadEntities(String filename) { + Properties properties = new Properties(); + Map<String, Character> entities = new HashMap<String, Character>(); + try { + InputStream in = Entities.class.getResourceAsStream(filename); + properties.load(in); + in.close(); + } catch (IOException e) { + throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename); + } + + for (Map.Entry entry: properties.entrySet()) { + Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16)); + String name = (String) entry.getKey(); + entities.put(name, val); + } + return entities; + } + + private static Map<Character, String> toCharacterKey(Map<String, Character> inMap) { + Map<Character, String> outMap = new HashMap<Character, String>(); + for (Map.Entry<String, Character> entry: inMap.entrySet()) { + Character character = entry.getValue(); + String name = entry.getKey(); + + if (outMap.containsKey(character)) { + // dupe, prefer the lower case version + if (name.toLowerCase().equals(name)) + outMap.put(character, name); + } else { + outMap.put(character, name); + } + } + return outMap; + } +} diff --git a/src/org/jsoup/nodes/Node.java b/src/org/jsoup/nodes/Node.java new file mode 100644 index 0000000000..eb2b40ee73 --- /dev/null +++ b/src/org/jsoup/nodes/Node.java @@ -0,0 +1,615 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; +import org.jsoup.parser.Parser; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + The base, abstract Node model. Elements, Documents, Comments etc are all Node instances. + + @author Jonathan Hedley, jonathan@hedley.net */ +public abstract class Node implements Cloneable { + Node parentNode; + List<Node> childNodes; + Attributes attributes; + String baseUri; + int siblingIndex; + + /** + Create a new Node. + @param baseUri base URI + @param attributes attributes (not null, but may be empty) + */ + protected Node(String baseUri, Attributes attributes) { + Validate.notNull(baseUri); + Validate.notNull(attributes); + + childNodes = new ArrayList<Node>(4); + this.baseUri = baseUri.trim(); + this.attributes = attributes; + } + + protected Node(String baseUri) { + this(baseUri, new Attributes()); + } + + /** + * Default constructor. Doesn't setup base uri, children, or attributes; use with caution. + */ + protected Node() { + childNodes = Collections.emptyList(); + attributes = null; + } + + /** + Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). + @return node name + */ + public abstract String nodeName(); + + /** + * Get an attribute's value by its key. + * <p/> + * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>, + * which is a shortcut to the {@link #absUrl} method. + * E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote> + * @param attributeKey The attribute key. + * @return The attribute, or empty string if not present (to avoid nulls). + * @see #attributes() + * @see #hasAttr(String) + * @see #absUrl(String) + */ + public String attr(String attributeKey) { + Validate.notNull(attributeKey); + + if (attributes.hasKey(attributeKey)) + return attributes.get(attributeKey); + else if (attributeKey.toLowerCase().startsWith("abs:")) + return absUrl(attributeKey.substring("abs:".length())); + else return ""; + } + + /** + * Get all of the element's attributes. + * @return attributes (which implements iterable, in same order as presented in original HTML). + */ + public Attributes attributes() { + return attributes; + } + + /** + * Set an attribute (key=value). If the attribute already exists, it is replaced. + * @param attributeKey The attribute key. + * @param attributeValue The attribute value. + * @return this (for chaining) + */ + public Node attr(String attributeKey, String attributeValue) { + attributes.put(attributeKey, attributeValue); + return this; + } + + /** + * Test if this element has an attribute. + * @param attributeKey The attribute key to check. + * @return true if the attribute exists, false if not. + */ + public boolean hasAttr(String attributeKey) { + Validate.notNull(attributeKey); + + if (attributeKey.toLowerCase().startsWith("abs:")) { + String key = attributeKey.substring("abs:".length()); + if (attributes.hasKey(key) && !absUrl(key).equals("")) + return true; + } + return attributes.hasKey(attributeKey); + } + + /** + * Remove an attribute from this element. + * @param attributeKey The attribute to remove. + * @return this (for chaining) + */ + public Node removeAttr(String attributeKey) { + Validate.notNull(attributeKey); + attributes.remove(attributeKey); + return this; + } + + /** + Get the base URI of this node. + @return base URI + */ + public String baseUri() { + return baseUri; + } + + /** + Update the base URI of this node and all of its descendants. + @param baseUri base URI to set + */ + public void setBaseUri(final String baseUri) { + Validate.notNull(baseUri); + + traverse(new NodeVisitor() { + public void head(Node node, int depth) { + node.baseUri = baseUri; + } + + public void tail(Node node, int depth) { + } + }); + } + + /** + * Get an absolute URL from a URL attribute that may be relative (i.e. an <code><a href></code> or + * <code><img src></code>). + * <p/> + * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> + * <p/> + * If the attribute value is already absolute (i.e. it starts with a protocol, like + * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is + * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made + * absolute using that. + * <p/> + * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: + * <code>String absUrl = linkEl.attr("abs:href");</code> + * + * @param attributeKey The attribute key + * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or + * could not be made successfully into a URL. + * @see #attr + * @see java.net.URL#URL(java.net.URL, String) + */ + public String absUrl(String attributeKey) { + Validate.notEmpty(attributeKey); + + String relUrl = attr(attributeKey); + if (!hasAttr(attributeKey)) { + return ""; // nothing to make absolute with + } else { + URL base; + try { + try { + base = new URL(baseUri); + } catch (MalformedURLException e) { + // the base is unsuitable, but the attribute may be abs on its own, so try that + URL abs = new URL(relUrl); + return abs.toExternalForm(); + } + // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired + if (relUrl.startsWith("?")) + relUrl = base.getPath() + relUrl; + URL abs = new URL(base, relUrl); + return abs.toExternalForm(); + } catch (MalformedURLException e) { + return ""; + } + } + } + + /** + Get a child node by index + @param index index of child node + @return the child node at this index. + */ + public Node childNode(int index) { + return childNodes.get(index); + } + + /** + Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes + themselves can be manipulated. + @return list of children. If no children, returns an empty list. + */ + public List<Node> childNodes() { + return Collections.unmodifiableList(childNodes); + } + + protected Node[] childNodesAsArray() { + return childNodes.toArray(new Node[childNodes().size()]); + } + + /** + Gets this node's parent node. + @return parent node; or null if no parent. + */ + public Node parent() { + return parentNode; + } + + /** + * Gets the Document associated with this Node. + * @return the Document associated with this Node, or null if there is no such Document. + */ + public Document ownerDocument() { + if (this instanceof Document) + return (Document) this; + else if (parentNode == null) + return null; + else + return parentNode.ownerDocument(); + } + + /** + * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. + */ + public void remove() { + Validate.notNull(parentNode); + parentNode.removeChild(this); + } + + /** + * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling). + * @param html HTML to add before this node + * @return this node, for chaining + * @see #after(String) + */ + public Node before(String html) { + addSiblingHtml(siblingIndex(), html); + return this; + } + + /** + * Insert the specified node into the DOM before this node (i.e. as a preceding sibling). + * @param node to add before this node + * @return this node, for chaining + * @see #after(Node) + */ + public Node before(Node node) { + Validate.notNull(node); + Validate.notNull(parentNode); + + parentNode.addChildren(siblingIndex(), node); + return this; + } + + /** + * Insert the specified HTML into the DOM after this node (i.e. as a following sibling). + * @param html HTML to add after this node + * @return this node, for chaining + * @see #before(String) + */ + public Node after(String html) { + addSiblingHtml(siblingIndex()+1, html); + return this; + } + + /** + * Insert the specified node into the DOM after this node (i.e. as a following sibling). + * @param node to add after this node + * @return this node, for chaining + * @see #before(Node) + */ + public Node after(Node node) { + Validate.notNull(node); + Validate.notNull(parentNode); + + parentNode.addChildren(siblingIndex()+1, node); + return this; + } + + private void addSiblingHtml(int index, String html) { + Validate.notNull(html); + Validate.notNull(parentNode); + + Element context = parent() instanceof Element ? (Element) parent() : null; + List<Node> nodes = Parser.parseFragment(html, context, baseUri()); + parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()])); + } + + /** + Wrap the supplied HTML around this node. + @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. + @return this node, for chaining. + */ + public Node wrap(String html) { + Validate.notEmpty(html); + + Element context = parent() instanceof Element ? (Element) parent() : null; + List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri()); + Node wrapNode = wrapChildren.get(0); + if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop + return null; + + Element wrap = (Element) wrapNode; + Element deepest = getDeepChild(wrap); + parentNode.replaceChild(this, wrap); + deepest.addChildren(this); + + // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder + if (wrapChildren.size() > 0) { + for (int i = 0; i < wrapChildren.size(); i++) { + Node remainder = wrapChildren.get(i); + remainder.parentNode.removeChild(remainder); + wrap.appendChild(remainder); + } + } + return this; + } + + /** + * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping + * the node but keeping its children. + * <p/> + * For example, with the input html:<br/> + * {@code <div>One <span>Two <b>Three</b></span></div>}<br/> + * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br/> + * {@code <div>One Two <b>Three</b></div>}<br/> + * and the {@code "Two "} {@link TextNode} being returned. + * @return the first child of this node, after the node has been unwrapped. Null if the node had no children. + * @see #remove() + * @see #wrap(String) + */ + public Node unwrap() { + Validate.notNull(parentNode); + + int index = siblingIndex; + Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null; + parentNode.addChildren(index, this.childNodesAsArray()); + this.remove(); + + return firstChild; + } + + private Element getDeepChild(Element el) { + List<Element> children = el.children(); + if (children.size() > 0) + return getDeepChild(children.get(0)); + else + return el; + } + + /** + * Replace this node in the DOM with the supplied node. + * @param in the node that will will replace the existing node. + */ + public void replaceWith(Node in) { + Validate.notNull(in); + Validate.notNull(parentNode); + parentNode.replaceChild(this, in); + } + + protected void setParentNode(Node parentNode) { + if (this.parentNode != null) + this.parentNode.removeChild(this); + this.parentNode = parentNode; + } + + protected void replaceChild(Node out, Node in) { + Validate.isTrue(out.parentNode == this); + Validate.notNull(in); + if (in.parentNode != null) + in.parentNode.removeChild(in); + + Integer index = out.siblingIndex(); + childNodes.set(index, in); + in.parentNode = this; + in.setSiblingIndex(index); + out.parentNode = null; + } + + protected void removeChild(Node out) { + Validate.isTrue(out.parentNode == this); + int index = out.siblingIndex(); + childNodes.remove(index); + reindexChildren(); + out.parentNode = null; + } + + protected void addChildren(Node... children) { + //most used. short circuit addChildren(int), which hits reindex children and array copy + for (Node child: children) { + reparentChild(child); + childNodes.add(child); + child.setSiblingIndex(childNodes.size()-1); + } + } + + protected void addChildren(int index, Node... children) { + Validate.noNullElements(children); + for (int i = children.length - 1; i >= 0; i--) { + Node in = children[i]; + reparentChild(in); + childNodes.add(index, in); + } + reindexChildren(); + } + + private void reparentChild(Node child) { + if (child.parentNode != null) + child.parentNode.removeChild(child); + child.setParentNode(this); + } + + private void reindexChildren() { + for (int i = 0; i < childNodes.size(); i++) { + childNodes.get(i).setSiblingIndex(i); + } + } + + /** + Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not + include this node (a node is not a sibling of itself). + @return node siblings. If the node has no parent, returns an empty list. + */ + public List<Node> siblingNodes() { + if (parentNode == null) + return Collections.emptyList(); + + List<Node> nodes = parentNode.childNodes; + List<Node> siblings = new ArrayList<Node>(nodes.size() - 1); + for (Node node: nodes) + if (node != this) + siblings.add(node); + return siblings; + } + + /** + Get this node's next sibling. + @return next sibling, or null if this is the last sibling + */ + public Node nextSibling() { + if (parentNode == null) + return null; // root + + List<Node> siblings = parentNode.childNodes; + Integer index = siblingIndex(); + Validate.notNull(index); + if (siblings.size() > index+1) + return siblings.get(index+1); + else + return null; + } + + /** + Get this node's previous sibling. + @return the previous sibling, or null if this is the first sibling + */ + public Node previousSibling() { + if (parentNode == null) + return null; // root + + List<Node> siblings = parentNode.childNodes; + Integer index = siblingIndex(); + Validate.notNull(index); + if (index > 0) + return siblings.get(index-1); + else + return null; + } + + /** + * Get the list index of this node in its node sibling list. I.e. if this is the first node + * sibling, returns 0. + * @return position in node sibling list + * @see org.jsoup.nodes.Element#elementSiblingIndex() + */ + public int siblingIndex() { + return siblingIndex; + } + + protected void setSiblingIndex(int siblingIndex) { + this.siblingIndex = siblingIndex; + } + + /** + * Perform a depth-first traversal through this node and its descendants. + * @param nodeVisitor the visitor callbacks to perform on each node + * @return this node, for chaining + */ + public Node traverse(NodeVisitor nodeVisitor) { + Validate.notNull(nodeVisitor); + NodeTraversor traversor = new NodeTraversor(nodeVisitor); + traversor.traverse(this); + return this; + } + + /** + Get the outer HTML of this node. + @return HTML + */ + public String outerHtml() { + StringBuilder accum = new StringBuilder(128); + outerHtml(accum); + return accum.toString(); + } + + protected void outerHtml(StringBuilder accum) { + new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this); + } + + // if this node has no document (or parent), retrieve the default output settings + private Document.OutputSettings getOutputSettings() { + return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings(); + } + + /** + Get the outer HTML of this node. + @param accum accumulator to place HTML into + */ + abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out); + + abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out); + + public String toString() { + return outerHtml(); + } + + protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) { + accum.append("\n").append(StringUtil.padding(depth * out.indentAmount())); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + // todo: have nodes hold a child index, compare against that and parent (not children) + return false; + } + + @Override + public int hashCode() { + int result = parentNode != null ? parentNode.hashCode() : 0; + // not children, or will block stack as they go back up to parent) + result = 31 * result + (attributes != null ? attributes.hashCode() : 0); + return result; + } + + /** + * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or + * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the + * original node. + * <p> + * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}. + * @return stand-alone cloned node + */ + @Override + public Node clone() { + return doClone(null); // splits for orphan + } + + protected Node doClone(Node parent) { + Node clone; + try { + clone = (Node) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + + clone.parentNode = parent; // can be null, to create an orphan split + clone.siblingIndex = parent == null ? 0 : siblingIndex; + clone.attributes = attributes != null ? attributes.clone() : null; + clone.baseUri = baseUri; + clone.childNodes = new ArrayList<Node>(childNodes.size()); + for (Node child: childNodes) + clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent + + return clone; + } + + private static class OuterHtmlVisitor implements NodeVisitor { + private StringBuilder accum; + private Document.OutputSettings out; + + OuterHtmlVisitor(StringBuilder accum, Document.OutputSettings out) { + this.accum = accum; + this.out = out; + } + + public void head(Node node, int depth) { + node.outerHtmlHead(accum, depth, out); + } + + public void tail(Node node, int depth) { + if (!node.nodeName().equals("#text")) // saves a void hit. + node.outerHtmlTail(accum, depth, out); + } + } +} diff --git a/src/org/jsoup/nodes/TextNode.java b/src/org/jsoup/nodes/TextNode.java new file mode 100644 index 0000000000..9fd0feac8f --- /dev/null +++ b/src/org/jsoup/nodes/TextNode.java @@ -0,0 +1,175 @@ +package org.jsoup.nodes; + +import org.jsoup.helper.StringUtil; +import org.jsoup.helper.Validate; + +/** + A text node. + + @author Jonathan Hedley, jonathan@hedley.net */ +public class TextNode extends Node { + /* + TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use + memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create + them as needed on the fly. + */ + private static final String TEXT_KEY = "text"; + String text; + + /** + Create a new TextNode representing the supplied (unencoded) text). + + @param text raw text + @param baseUri base uri + @see #createFromEncoded(String, String) + */ + public TextNode(String text, String baseUri) { + this.baseUri = baseUri; + this.text = text; + } + + public String nodeName() { + return "#text"; + } + + /** + * Get the text content of this text node. + * @return Unencoded, normalised text. + * @see TextNode#getWholeText() + */ + public String text() { + return normaliseWhitespace(getWholeText()); + } + + /** + * Set the text content of this text node. + * @param text unencoded text + * @return this, for chaining + */ + public TextNode text(String text) { + this.text = text; + if (attributes != null) + attributes.put(TEXT_KEY, text); + return this; + } + + /** + Get the (unencoded) text of this text node, including any newlines and spaces present in the original. + @return text + */ + public String getWholeText() { + return attributes == null ? text : attributes.get(TEXT_KEY); + } + + /** + Test if this text node is blank -- that is, empty or only whitespace (including newlines). + @return true if this document is empty or only whitespace, false if it contains any text content. + */ + public boolean isBlank() { + return StringUtil.isBlank(getWholeText()); + } + + /** + * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the + * original text up to the offset, and will have a new text node sibling containing the text after the offset. + * @param offset string offset point to split node at. + * @return the newly created text node containing the text after the offset. + */ + public TextNode splitText(int offset) { + Validate.isTrue(offset >= 0, "Split offset must be not be negative"); + Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length"); + + String head = getWholeText().substring(0, offset); + String tail = getWholeText().substring(offset); + text(head); + TextNode tailNode = new TextNode(tail, this.baseUri()); + if (parent() != null) + parent().addChildren(siblingIndex()+1, tailNode); + + return tailNode; + } + + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + String html = Entities.escape(getWholeText(), out); + if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) { + html = normaliseWhitespace(html); + } + + if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) + indent(accum, depth, out); + accum.append(html); + } + + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {} + + public String toString() { + return outerHtml(); + } + + /** + * Create a new TextNode from HTML encoded (aka escaped) data. + * @param encodedText Text containing encoded HTML (e.g. &lt;) + * @return TextNode containing unencoded data (e.g. <) + */ + public static TextNode createFromEncoded(String encodedText, String baseUri) { + String text = Entities.unescape(encodedText); + return new TextNode(text, baseUri); + } + + static String normaliseWhitespace(String text) { + text = StringUtil.normaliseWhitespace(text); + return text; + } + + static String stripLeadingWhitespace(String text) { + return text.replaceFirst("^\\s+", ""); + } + + static boolean lastCharIsWhitespace(StringBuilder sb) { + return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; + } + + // attribute fiddling. create on first access. + private void ensureAttributes() { + if (attributes == null) { + attributes = new Attributes(); + attributes.put(TEXT_KEY, text); + } + } + + @Override + public String attr(String attributeKey) { + ensureAttributes(); + return super.attr(attributeKey); + } + + @Override + public Attributes attributes() { + ensureAttributes(); + return super.attributes(); + } + + @Override + public Node attr(String attributeKey, String attributeValue) { + ensureAttributes(); + return super.attr(attributeKey, attributeValue); + } + + @Override + public boolean hasAttr(String attributeKey) { + ensureAttributes(); + return super.hasAttr(attributeKey); + } + + @Override + public Node removeAttr(String attributeKey) { + ensureAttributes(); + return super.removeAttr(attributeKey); + } + + @Override + public String absUrl(String attributeKey) { + ensureAttributes(); + return super.absUrl(attributeKey); + } +} diff --git a/src/org/jsoup/nodes/XmlDeclaration.java b/src/org/jsoup/nodes/XmlDeclaration.java new file mode 100644 index 0000000000..80d4a0152f --- /dev/null +++ b/src/org/jsoup/nodes/XmlDeclaration.java @@ -0,0 +1,48 @@ +package org.jsoup.nodes; + +/** + An XML Declaration. + + @author Jonathan Hedley, jonathan@hedley.net */ +public class XmlDeclaration extends Node { + private static final String DECL_KEY = "declaration"; + private final boolean isProcessingInstruction; // <! if true, <? if false, declaration (and last data char should be ?) + + /** + Create a new XML declaration + @param data data + @param baseUri base uri + @param isProcessingInstruction is processing instruction + */ + public XmlDeclaration(String data, String baseUri, boolean isProcessingInstruction) { + super(baseUri); + attributes.put(DECL_KEY, data); + this.isProcessingInstruction = isProcessingInstruction; + } + + public String nodeName() { + return "#declaration"; + } + + /** + Get the unencoded XML declaration. + @return XML declaration + */ + public String getWholeDeclaration() { + return attributes.get(DECL_KEY); + } + + void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) { + accum + .append("<") + .append(isProcessingInstruction ? "!" : "?") + .append(getWholeDeclaration()) + .append(">"); + } + + void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {} + + public String toString() { + return outerHtml(); + } +} diff --git a/src/org/jsoup/nodes/entities-base.properties b/src/org/jsoup/nodes/entities-base.properties new file mode 100644 index 0000000000..3d1d11e6c4 --- /dev/null +++ b/src/org/jsoup/nodes/entities-base.properties @@ -0,0 +1,106 @@ +AElig=000C6 +AMP=00026 +Aacute=000C1 +Acirc=000C2 +Agrave=000C0 +Aring=000C5 +Atilde=000C3 +Auml=000C4 +COPY=000A9 +Ccedil=000C7 +ETH=000D0 +Eacute=000C9 +Ecirc=000CA +Egrave=000C8 +Euml=000CB +GT=0003E +Iacute=000CD +Icirc=000CE +Igrave=000CC +Iuml=000CF +LT=0003C +Ntilde=000D1 +Oacute=000D3 +Ocirc=000D4 +Ograve=000D2 +Oslash=000D8 +Otilde=000D5 +Ouml=000D6 +QUOT=00022 +REG=000AE +THORN=000DE +Uacute=000DA +Ucirc=000DB +Ugrave=000D9 +Uuml=000DC +Yacute=000DD +aacute=000E1 +acirc=000E2 +acute=000B4 +aelig=000E6 +agrave=000E0 +amp=00026 +aring=000E5 +atilde=000E3 +auml=000E4 +brvbar=000A6 +ccedil=000E7 +cedil=000B8 +cent=000A2 +copy=000A9 +curren=000A4 +deg=000B0 +divide=000F7 +eacute=000E9 +ecirc=000EA +egrave=000E8 +eth=000F0 +euml=000EB +frac12=000BD +frac14=000BC +frac34=000BE +gt=0003E +iacute=000ED +icirc=000EE +iexcl=000A1 +igrave=000EC +iquest=000BF +iuml=000EF +laquo=000AB +lt=0003C +macr=000AF +micro=000B5 +middot=000B7 +nbsp=000A0 +not=000AC +ntilde=000F1 +oacute=000F3 +ocirc=000F4 +ograve=000F2 +ordf=000AA +ordm=000BA +oslash=000F8 +otilde=000F5 +ouml=000F6 +para=000B6 +plusmn=000B1 +pound=000A3 +quot=00022 +raquo=000BB +reg=000AE +sect=000A7 +shy=000AD +sup1=000B9 +sup2=000B2 +sup3=000B3 +szlig=000DF +thorn=000FE +times=000D7 +uacute=000FA +ucirc=000FB +ugrave=000F9 +uml=000A8 +uuml=000FC +yacute=000FD +yen=000A5 +yuml=000FF diff --git a/src/org/jsoup/nodes/entities-full.properties b/src/org/jsoup/nodes/entities-full.properties new file mode 100644 index 0000000000..92f124f408 --- /dev/null +++ b/src/org/jsoup/nodes/entities-full.properties @@ -0,0 +1,2032 @@ +AElig=000C6 +AMP=00026 +Aacute=000C1 +Abreve=00102 +Acirc=000C2 +Acy=00410 +Afr=1D504 +Agrave=000C0 +Alpha=00391 +Amacr=00100 +And=02A53 +Aogon=00104 +Aopf=1D538 +ApplyFunction=02061 +Aring=000C5 +Ascr=1D49C +Assign=02254 +Atilde=000C3 +Auml=000C4 +Backslash=02216 +Barv=02AE7 +Barwed=02306 +Bcy=00411 +Because=02235 +Bernoullis=0212C +Beta=00392 +Bfr=1D505 +Bopf=1D539 +Breve=002D8 +Bscr=0212C +Bumpeq=0224E +CHcy=00427 +COPY=000A9 +Cacute=00106 +Cap=022D2 +CapitalDifferentialD=02145 +Cayleys=0212D +Ccaron=0010C +Ccedil=000C7 +Ccirc=00108 +Cconint=02230 +Cdot=0010A +Cedilla=000B8 +CenterDot=000B7 +Cfr=0212D +Chi=003A7 +CircleDot=02299 +CircleMinus=02296 +CirclePlus=02295 +CircleTimes=02297 +ClockwiseContourIntegral=02232 +CloseCurlyDoubleQuote=0201D +CloseCurlyQuote=02019 +Colon=02237 +Colone=02A74 +Congruent=02261 +Conint=0222F +ContourIntegral=0222E +Copf=02102 +Coproduct=02210 +CounterClockwiseContourIntegral=02233 +Cross=02A2F +Cscr=1D49E +Cup=022D3 +CupCap=0224D +DD=02145 +DDotrahd=02911 +DJcy=00402 +DScy=00405 +DZcy=0040F +Dagger=02021 +Darr=021A1 +Dashv=02AE4 +Dcaron=0010E +Dcy=00414 +Del=02207 +Delta=00394 +Dfr=1D507 +DiacriticalAcute=000B4 +DiacriticalDot=002D9 +DiacriticalDoubleAcute=002DD +DiacriticalGrave=00060 +DiacriticalTilde=002DC +Diamond=022C4 +DifferentialD=02146 +Dopf=1D53B +Dot=000A8 +DotDot=020DC +DotEqual=02250 +DoubleContourIntegral=0222F +DoubleDot=000A8 +DoubleDownArrow=021D3 +DoubleLeftArrow=021D0 +DoubleLeftRightArrow=021D4 +DoubleLeftTee=02AE4 +DoubleLongLeftArrow=027F8 +DoubleLongLeftRightArrow=027FA +DoubleLongRightArrow=027F9 +DoubleRightArrow=021D2 +DoubleRightTee=022A8 +DoubleUpArrow=021D1 +DoubleUpDownArrow=021D5 +DoubleVerticalBar=02225 +DownArrow=02193 +DownArrowBar=02913 +DownArrowUpArrow=021F5 +DownBreve=00311 +DownLeftRightVector=02950 +DownLeftTeeVector=0295E +DownLeftVector=021BD +DownLeftVectorBar=02956 +DownRightTeeVector=0295F +DownRightVector=021C1 +DownRightVectorBar=02957 +DownTee=022A4 +DownTeeArrow=021A7 +Downarrow=021D3 +Dscr=1D49F +Dstrok=00110 +ENG=0014A +ETH=000D0 +Eacute=000C9 +Ecaron=0011A +Ecirc=000CA +Ecy=0042D +Edot=00116 +Efr=1D508 +Egrave=000C8 +Element=02208 +Emacr=00112 +EmptySmallSquare=025FB +EmptyVerySmallSquare=025AB +Eogon=00118 +Eopf=1D53C +Epsilon=00395 +Equal=02A75 +EqualTilde=02242 +Equilibrium=021CC +Escr=02130 +Esim=02A73 +Eta=00397 +Euml=000CB +Exists=02203 +ExponentialE=02147 +Fcy=00424 +Ffr=1D509 +FilledSmallSquare=025FC +FilledVerySmallSquare=025AA +Fopf=1D53D +ForAll=02200 +Fouriertrf=02131 +Fscr=02131 +GJcy=00403 +GT=0003E +Gamma=00393 +Gammad=003DC +Gbreve=0011E +Gcedil=00122 +Gcirc=0011C +Gcy=00413 +Gdot=00120 +Gfr=1D50A +Gg=022D9 +Gopf=1D53E +GreaterEqual=02265 +GreaterEqualLess=022DB +GreaterFullEqual=02267 +GreaterGreater=02AA2 +GreaterLess=02277 +GreaterSlantEqual=02A7E +GreaterTilde=02273 +Gscr=1D4A2 +Gt=0226B +HARDcy=0042A +Hacek=002C7 +Hat=0005E +Hcirc=00124 +Hfr=0210C +HilbertSpace=0210B +Hopf=0210D +HorizontalLine=02500 +Hscr=0210B +Hstrok=00126 +HumpDownHump=0224E +HumpEqual=0224F +IEcy=00415 +IJlig=00132 +IOcy=00401 +Iacute=000CD +Icirc=000CE +Icy=00418 +Idot=00130 +Ifr=02111 +Igrave=000CC +Im=02111 +Imacr=0012A +ImaginaryI=02148 +Implies=021D2 +Int=0222C +Integral=0222B +Intersection=022C2 +InvisibleComma=02063 +InvisibleTimes=02062 +Iogon=0012E +Iopf=1D540 +Iota=00399 +Iscr=02110 +Itilde=00128 +Iukcy=00406 +Iuml=000CF +Jcirc=00134 +Jcy=00419 +Jfr=1D50D +Jopf=1D541 +Jscr=1D4A5 +Jsercy=00408 +Jukcy=00404 +KHcy=00425 +KJcy=0040C +Kappa=0039A +Kcedil=00136 +Kcy=0041A +Kfr=1D50E +Kopf=1D542 +Kscr=1D4A6 +LJcy=00409 +LT=0003C +Lacute=00139 +Lambda=0039B +Lang=027EA +Laplacetrf=02112 +Larr=0219E +Lcaron=0013D +Lcedil=0013B +Lcy=0041B +LeftAngleBracket=027E8 +LeftArrow=02190 +LeftArrowBar=021E4 +LeftArrowRightArrow=021C6 +LeftCeiling=02308 +LeftDoubleBracket=027E6 +LeftDownTeeVector=02961 +LeftDownVector=021C3 +LeftDownVectorBar=02959 +LeftFloor=0230A +LeftRightArrow=02194 +LeftRightVector=0294E +LeftTee=022A3 +LeftTeeArrow=021A4 +LeftTeeVector=0295A +LeftTriangle=022B2 +LeftTriangleBar=029CF +LeftTriangleEqual=022B4 +LeftUpDownVector=02951 +LeftUpTeeVector=02960 +LeftUpVector=021BF +LeftUpVectorBar=02958 +LeftVector=021BC +LeftVectorBar=02952 +Leftarrow=021D0 +Leftrightarrow=021D4 +LessEqualGreater=022DA +LessFullEqual=02266 +LessGreater=02276 +LessLess=02AA1 +LessSlantEqual=02A7D +LessTilde=02272 +Lfr=1D50F +Ll=022D8 +Lleftarrow=021DA +Lmidot=0013F +LongLeftArrow=027F5 +LongLeftRightArrow=027F7 +LongRightArrow=027F6 +Longleftarrow=027F8 +Longleftrightarrow=027FA +Longrightarrow=027F9 +Lopf=1D543 +LowerLeftArrow=02199 +LowerRightArrow=02198 +Lscr=02112 +Lsh=021B0 +Lstrok=00141 +Lt=0226A +Map=02905 +Mcy=0041C +MediumSpace=0205F +Mellintrf=02133 +Mfr=1D510 +MinusPlus=02213 +Mopf=1D544 +Mscr=02133 +Mu=0039C +NJcy=0040A +Nacute=00143 +Ncaron=00147 +Ncedil=00145 +Ncy=0041D +NegativeMediumSpace=0200B +NegativeThickSpace=0200B +NegativeThinSpace=0200B +NegativeVeryThinSpace=0200B +NestedGreaterGreater=0226B +NestedLessLess=0226A +NewLine=0000A +Nfr=1D511 +NoBreak=02060 +NonBreakingSpace=000A0 +Nopf=02115 +Not=02AEC +NotCongruent=02262 +NotCupCap=0226D +NotDoubleVerticalBar=02226 +NotElement=02209 +NotEqual=02260 +NotExists=02204 +NotGreater=0226F +NotGreaterEqual=02271 +NotGreaterLess=02279 +NotGreaterTilde=02275 +NotLeftTriangle=022EA +NotLeftTriangleEqual=022EC +NotLess=0226E +NotLessEqual=02270 +NotLessGreater=02278 +NotLessTilde=02274 +NotPrecedes=02280 +NotPrecedesSlantEqual=022E0 +NotReverseElement=0220C +NotRightTriangle=022EB +NotRightTriangleEqual=022ED +NotSquareSubsetEqual=022E2 +NotSquareSupersetEqual=022E3 +NotSubsetEqual=02288 +NotSucceeds=02281 +NotSucceedsSlantEqual=022E1 +NotSupersetEqual=02289 +NotTilde=02241 +NotTildeEqual=02244 +NotTildeFullEqual=02247 +NotTildeTilde=02249 +NotVerticalBar=02224 +Nscr=1D4A9 +Ntilde=000D1 +Nu=0039D +OElig=00152 +Oacute=000D3 +Ocirc=000D4 +Ocy=0041E +Odblac=00150 +Ofr=1D512 +Ograve=000D2 +Omacr=0014C +Omega=003A9 +Omicron=0039F +Oopf=1D546 +OpenCurlyDoubleQuote=0201C +OpenCurlyQuote=02018 +Or=02A54 +Oscr=1D4AA +Oslash=000D8 +Otilde=000D5 +Otimes=02A37 +Ouml=000D6 +OverBar=0203E +OverBrace=023DE +OverBracket=023B4 +OverParenthesis=023DC +PartialD=02202 +Pcy=0041F +Pfr=1D513 +Phi=003A6 +Pi=003A0 +PlusMinus=000B1 +Poincareplane=0210C +Popf=02119 +Pr=02ABB +Precedes=0227A +PrecedesEqual=02AAF +PrecedesSlantEqual=0227C +PrecedesTilde=0227E +Prime=02033 +Product=0220F +Proportion=02237 +Proportional=0221D +Pscr=1D4AB +Psi=003A8 +QUOT=00022 +Qfr=1D514 +Qopf=0211A +Qscr=1D4AC +RBarr=02910 +REG=000AE +Racute=00154 +Rang=027EB +Rarr=021A0 +Rarrtl=02916 +Rcaron=00158 +Rcedil=00156 +Rcy=00420 +Re=0211C +ReverseElement=0220B +ReverseEquilibrium=021CB +ReverseUpEquilibrium=0296F +Rfr=0211C +Rho=003A1 +RightAngleBracket=027E9 +RightArrow=02192 +RightArrowBar=021E5 +RightArrowLeftArrow=021C4 +RightCeiling=02309 +RightDoubleBracket=027E7 +RightDownTeeVector=0295D +RightDownVector=021C2 +RightDownVectorBar=02955 +RightFloor=0230B +RightTee=022A2 +RightTeeArrow=021A6 +RightTeeVector=0295B +RightTriangle=022B3 +RightTriangleBar=029D0 +RightTriangleEqual=022B5 +RightUpDownVector=0294F +RightUpTeeVector=0295C +RightUpVector=021BE +RightUpVectorBar=02954 +RightVector=021C0 +RightVectorBar=02953 +Rightarrow=021D2 +Ropf=0211D +RoundImplies=02970 +Rrightarrow=021DB +Rscr=0211B +Rsh=021B1 +RuleDelayed=029F4 +SHCHcy=00429 +SHcy=00428 +SOFTcy=0042C +Sacute=0015A +Sc=02ABC +Scaron=00160 +Scedil=0015E +Scirc=0015C +Scy=00421 +Sfr=1D516 +ShortDownArrow=02193 +ShortLeftArrow=02190 +ShortRightArrow=02192 +ShortUpArrow=02191 +Sigma=003A3 +SmallCircle=02218 +Sopf=1D54A +Sqrt=0221A +Square=025A1 +SquareIntersection=02293 +SquareSubset=0228F +SquareSubsetEqual=02291 +SquareSuperset=02290 +SquareSupersetEqual=02292 +SquareUnion=02294 +Sscr=1D4AE +Star=022C6 +Sub=022D0 +Subset=022D0 +SubsetEqual=02286 +Succeeds=0227B +SucceedsEqual=02AB0 +SucceedsSlantEqual=0227D +SucceedsTilde=0227F +SuchThat=0220B +Sum=02211 +Sup=022D1 +Superset=02283 +SupersetEqual=02287 +Supset=022D1 +THORN=000DE +TRADE=02122 +TSHcy=0040B +TScy=00426 +Tab=00009 +Tau=003A4 +Tcaron=00164 +Tcedil=00162 +Tcy=00422 +Tfr=1D517 +Therefore=02234 +Theta=00398 +ThinSpace=02009 +Tilde=0223C +TildeEqual=02243 +TildeFullEqual=02245 +TildeTilde=02248 +Topf=1D54B +TripleDot=020DB +Tscr=1D4AF +Tstrok=00166 +Uacute=000DA +Uarr=0219F +Uarrocir=02949 +Ubrcy=0040E +Ubreve=0016C +Ucirc=000DB +Ucy=00423 +Udblac=00170 +Ufr=1D518 +Ugrave=000D9 +Umacr=0016A +UnderBar=0005F +UnderBrace=023DF +UnderBracket=023B5 +UnderParenthesis=023DD +Union=022C3 +UnionPlus=0228E +Uogon=00172 +Uopf=1D54C +UpArrow=02191 +UpArrowBar=02912 +UpArrowDownArrow=021C5 +UpDownArrow=02195 +UpEquilibrium=0296E +UpTee=022A5 +UpTeeArrow=021A5 +Uparrow=021D1 +Updownarrow=021D5 +UpperLeftArrow=02196 +UpperRightArrow=02197 +Upsi=003D2 +Upsilon=003A5 +Uring=0016E +Uscr=1D4B0 +Utilde=00168 +Uuml=000DC +VDash=022AB +Vbar=02AEB +Vcy=00412 +Vdash=022A9 +Vdashl=02AE6 +Vee=022C1 +Verbar=02016 +Vert=02016 +VerticalBar=02223 +VerticalLine=0007C +VerticalSeparator=02758 +VerticalTilde=02240 +VeryThinSpace=0200A +Vfr=1D519 +Vopf=1D54D +Vscr=1D4B1 +Vvdash=022AA +Wcirc=00174 +Wedge=022C0 +Wfr=1D51A +Wopf=1D54E +Wscr=1D4B2 +Xfr=1D51B +Xi=0039E +Xopf=1D54F +Xscr=1D4B3 +YAcy=0042F +YIcy=00407 +YUcy=0042E +Yacute=000DD +Ycirc=00176 +Ycy=0042B +Yfr=1D51C +Yopf=1D550 +Yscr=1D4B4 +Yuml=00178 +ZHcy=00416 +Zacute=00179 +Zcaron=0017D +Zcy=00417 +Zdot=0017B +ZeroWidthSpace=0200B +Zeta=00396 +Zfr=02128 +Zopf=02124 +Zscr=1D4B5 +aacute=000E1 +abreve=00103 +ac=0223E +acd=0223F +acirc=000E2 +acute=000B4 +acy=00430 +aelig=000E6 +af=02061 +afr=1D51E +agrave=000E0 +alefsym=02135 +aleph=02135 +alpha=003B1 +amacr=00101 +amalg=02A3F +amp=00026 +and=02227 +andand=02A55 +andd=02A5C +andslope=02A58 +andv=02A5A +ang=02220 +ange=029A4 +angle=02220 +angmsd=02221 +angmsdaa=029A8 +angmsdab=029A9 +angmsdac=029AA +angmsdad=029AB +angmsdae=029AC +angmsdaf=029AD +angmsdag=029AE +angmsdah=029AF +angrt=0221F +angrtvb=022BE +angrtvbd=0299D +angsph=02222 +angst=000C5 +angzarr=0237C +aogon=00105 +aopf=1D552 +ap=02248 +apE=02A70 +apacir=02A6F +ape=0224A +apid=0224B +apos=00027 +approx=02248 +approxeq=0224A +aring=000E5 +ascr=1D4B6 +ast=0002A +asymp=02248 +asympeq=0224D +atilde=000E3 +auml=000E4 +awconint=02233 +awint=02A11 +bNot=02AED +backcong=0224C +backepsilon=003F6 +backprime=02035 +backsim=0223D +backsimeq=022CD +barvee=022BD +barwed=02305 +barwedge=02305 +bbrk=023B5 +bbrktbrk=023B6 +bcong=0224C +bcy=00431 +bdquo=0201E +becaus=02235 +because=02235 +bemptyv=029B0 +bepsi=003F6 +bernou=0212C +beta=003B2 +beth=02136 +between=0226C +bfr=1D51F +bigcap=022C2 +bigcirc=025EF +bigcup=022C3 +bigodot=02A00 +bigoplus=02A01 +bigotimes=02A02 +bigsqcup=02A06 +bigstar=02605 +bigtriangledown=025BD +bigtriangleup=025B3 +biguplus=02A04 +bigvee=022C1 +bigwedge=022C0 +bkarow=0290D +blacklozenge=029EB +blacksquare=025AA +blacktriangle=025B4 +blacktriangledown=025BE +blacktriangleleft=025C2 +blacktriangleright=025B8 +blank=02423 +blk12=02592 +blk14=02591 +blk34=02593 +block=02588 +bnot=02310 +bopf=1D553 +bot=022A5 +bottom=022A5 +bowtie=022C8 +boxDL=02557 +boxDR=02554 +boxDl=02556 +boxDr=02553 +boxH=02550 +boxHD=02566 +boxHU=02569 +boxHd=02564 +boxHu=02567 +boxUL=0255D +boxUR=0255A +boxUl=0255C +boxUr=02559 +boxV=02551 +boxVH=0256C +boxVL=02563 +boxVR=02560 +boxVh=0256B +boxVl=02562 +boxVr=0255F +boxbox=029C9 +boxdL=02555 +boxdR=02552 +boxdl=02510 +boxdr=0250C +boxh=02500 +boxhD=02565 +boxhU=02568 +boxhd=0252C +boxhu=02534 +boxminus=0229F +boxplus=0229E +boxtimes=022A0 +boxuL=0255B +boxuR=02558 +boxul=02518 +boxur=02514 +boxv=02502 +boxvH=0256A +boxvL=02561 +boxvR=0255E +boxvh=0253C +boxvl=02524 +boxvr=0251C +bprime=02035 +breve=002D8 +brvbar=000A6 +bscr=1D4B7 +bsemi=0204F +bsim=0223D +bsime=022CD +bsol=0005C +bsolb=029C5 +bsolhsub=027C8 +bull=02022 +bullet=02022 +bump=0224E +bumpE=02AAE +bumpe=0224F +bumpeq=0224F +cacute=00107 +cap=02229 +capand=02A44 +capbrcup=02A49 +capcap=02A4B +capcup=02A47 +capdot=02A40 +caret=02041 +caron=002C7 +ccaps=02A4D +ccaron=0010D +ccedil=000E7 +ccirc=00109 +ccups=02A4C +ccupssm=02A50 +cdot=0010B +cedil=000B8 +cemptyv=029B2 +cent=000A2 +centerdot=000B7 +cfr=1D520 +chcy=00447 +check=02713 +checkmark=02713 +chi=003C7 +cir=025CB +cirE=029C3 +circ=002C6 +circeq=02257 +circlearrowleft=021BA +circlearrowright=021BB +circledR=000AE +circledS=024C8 +circledast=0229B +circledcirc=0229A +circleddash=0229D +cire=02257 +cirfnint=02A10 +cirmid=02AEF +cirscir=029C2 +clubs=02663 +clubsuit=02663 +colon=0003A +colone=02254 +coloneq=02254 +comma=0002C +commat=00040 +comp=02201 +compfn=02218 +complement=02201 +complexes=02102 +cong=02245 +congdot=02A6D +conint=0222E +copf=1D554 +coprod=02210 +copy=000A9 +copysr=02117 +crarr=021B5 +cross=02717 +cscr=1D4B8 +csub=02ACF +csube=02AD1 +csup=02AD0 +csupe=02AD2 +ctdot=022EF +cudarrl=02938 +cudarrr=02935 +cuepr=022DE +cuesc=022DF +cularr=021B6 +cularrp=0293D +cup=0222A +cupbrcap=02A48 +cupcap=02A46 +cupcup=02A4A +cupdot=0228D +cupor=02A45 +curarr=021B7 +curarrm=0293C +curlyeqprec=022DE +curlyeqsucc=022DF +curlyvee=022CE +curlywedge=022CF +curren=000A4 +curvearrowleft=021B6 +curvearrowright=021B7 +cuvee=022CE +cuwed=022CF +cwconint=02232 +cwint=02231 +cylcty=0232D +dArr=021D3 +dHar=02965 +dagger=02020 +daleth=02138 +darr=02193 +dash=02010 +dashv=022A3 +dbkarow=0290F +dblac=002DD +dcaron=0010F +dcy=00434 +dd=02146 +ddagger=02021 +ddarr=021CA +ddotseq=02A77 +deg=000B0 +delta=003B4 +demptyv=029B1 +dfisht=0297F +dfr=1D521 +dharl=021C3 +dharr=021C2 +diam=022C4 +diamond=022C4 +diamondsuit=02666 +diams=02666 +die=000A8 +digamma=003DD +disin=022F2 +div=000F7 +divide=000F7 +divideontimes=022C7 +divonx=022C7 +djcy=00452 +dlcorn=0231E +dlcrop=0230D +dollar=00024 +dopf=1D555 +dot=002D9 +doteq=02250 +doteqdot=02251 +dotminus=02238 +dotplus=02214 +dotsquare=022A1 +doublebarwedge=02306 +downarrow=02193 +downdownarrows=021CA +downharpoonleft=021C3 +downharpoonright=021C2 +drbkarow=02910 +drcorn=0231F +drcrop=0230C +dscr=1D4B9 +dscy=00455 +dsol=029F6 +dstrok=00111 +dtdot=022F1 +dtri=025BF +dtrif=025BE +duarr=021F5 +duhar=0296F +dwangle=029A6 +dzcy=0045F +dzigrarr=027FF +eDDot=02A77 +eDot=02251 +eacute=000E9 +easter=02A6E +ecaron=0011B +ecir=02256 +ecirc=000EA +ecolon=02255 +ecy=0044D +edot=00117 +ee=02147 +efDot=02252 +efr=1D522 +eg=02A9A +egrave=000E8 +egs=02A96 +egsdot=02A98 +el=02A99 +elinters=023E7 +ell=02113 +els=02A95 +elsdot=02A97 +emacr=00113 +empty=02205 +emptyset=02205 +emptyv=02205 +emsp13=02004 +emsp14=02005 +emsp=02003 +eng=0014B +ensp=02002 +eogon=00119 +eopf=1D556 +epar=022D5 +eparsl=029E3 +eplus=02A71 +epsi=003B5 +epsilon=003B5 +epsiv=003F5 +eqcirc=02256 +eqcolon=02255 +eqsim=02242 +eqslantgtr=02A96 +eqslantless=02A95 +equals=0003D +equest=0225F +equiv=02261 +equivDD=02A78 +eqvparsl=029E5 +erDot=02253 +erarr=02971 +escr=0212F +esdot=02250 +esim=02242 +eta=003B7 +eth=000F0 +euml=000EB +euro=020AC +excl=00021 +exist=02203 +expectation=02130 +exponentiale=02147 +fallingdotseq=02252 +fcy=00444 +female=02640 +ffilig=0FB03 +fflig=0FB00 +ffllig=0FB04 +ffr=1D523 +filig=0FB01 +flat=0266D +fllig=0FB02 +fltns=025B1 +fnof=00192 +fopf=1D557 +forall=02200 +fork=022D4 +forkv=02AD9 +fpartint=02A0D +frac12=000BD +frac13=02153 +frac14=000BC +frac15=02155 +frac16=02159 +frac18=0215B +frac23=02154 +frac25=02156 +frac34=000BE +frac35=02157 +frac38=0215C +frac45=02158 +frac56=0215A +frac58=0215D +frac78=0215E +frasl=02044 +frown=02322 +fscr=1D4BB +gE=02267 +gEl=02A8C +gacute=001F5 +gamma=003B3 +gammad=003DD +gap=02A86 +gbreve=0011F +gcirc=0011D +gcy=00433 +gdot=00121 +ge=02265 +gel=022DB +geq=02265 +geqq=02267 +geqslant=02A7E +ges=02A7E +gescc=02AA9 +gesdot=02A80 +gesdoto=02A82 +gesdotol=02A84 +gesles=02A94 +gfr=1D524 +gg=0226B +ggg=022D9 +gimel=02137 +gjcy=00453 +gl=02277 +glE=02A92 +gla=02AA5 +glj=02AA4 +gnE=02269 +gnap=02A8A +gnapprox=02A8A +gne=02A88 +gneq=02A88 +gneqq=02269 +gnsim=022E7 +gopf=1D558 +grave=00060 +gscr=0210A +gsim=02273 +gsime=02A8E +gsiml=02A90 +gt=0003E +gtcc=02AA7 +gtcir=02A7A +gtdot=022D7 +gtlPar=02995 +gtquest=02A7C +gtrapprox=02A86 +gtrarr=02978 +gtrdot=022D7 +gtreqless=022DB +gtreqqless=02A8C +gtrless=02277 +gtrsim=02273 +hArr=021D4 +hairsp=0200A +half=000BD +hamilt=0210B +hardcy=0044A +harr=02194 +harrcir=02948 +harrw=021AD +hbar=0210F +hcirc=00125 +hearts=02665 +heartsuit=02665 +hellip=02026 +hercon=022B9 +hfr=1D525 +hksearow=02925 +hkswarow=02926 +hoarr=021FF +homtht=0223B +hookleftarrow=021A9 +hookrightarrow=021AA +hopf=1D559 +horbar=02015 +hscr=1D4BD +hslash=0210F +hstrok=00127 +hybull=02043 +hyphen=02010 +iacute=000ED +ic=02063 +icirc=000EE +icy=00438 +iecy=00435 +iexcl=000A1 +iff=021D4 +ifr=1D526 +igrave=000EC +ii=02148 +iiiint=02A0C +iiint=0222D +iinfin=029DC +iiota=02129 +ijlig=00133 +imacr=0012B +image=02111 +imagline=02110 +imagpart=02111 +imath=00131 +imof=022B7 +imped=001B5 +in=02208 +incare=02105 +infin=0221E +infintie=029DD +inodot=00131 +int=0222B +intcal=022BA +integers=02124 +intercal=022BA +intlarhk=02A17 +intprod=02A3C +iocy=00451 +iogon=0012F +iopf=1D55A +iota=003B9 +iprod=02A3C +iquest=000BF +iscr=1D4BE +isin=02208 +isinE=022F9 +isindot=022F5 +isins=022F4 +isinsv=022F3 +isinv=02208 +it=02062 +itilde=00129 +iukcy=00456 +iuml=000EF +jcirc=00135 +jcy=00439 +jfr=1D527 +jmath=00237 +jopf=1D55B +jscr=1D4BF +jsercy=00458 +jukcy=00454 +kappa=003BA +kappav=003F0 +kcedil=00137 +kcy=0043A +kfr=1D528 +kgreen=00138 +khcy=00445 +kjcy=0045C +kopf=1D55C +kscr=1D4C0 +lAarr=021DA +lArr=021D0 +lAtail=0291B +lBarr=0290E +lE=02266 +lEg=02A8B +lHar=02962 +lacute=0013A +laemptyv=029B4 +lagran=02112 +lambda=003BB +lang=027E8 +langd=02991 +langle=027E8 +lap=02A85 +laquo=000AB +larr=02190 +larrb=021E4 +larrbfs=0291F +larrfs=0291D +larrhk=021A9 +larrlp=021AB +larrpl=02939 +larrsim=02973 +larrtl=021A2 +lat=02AAB +latail=02919 +late=02AAD +lbarr=0290C +lbbrk=02772 +lbrace=0007B +lbrack=0005B +lbrke=0298B +lbrksld=0298F +lbrkslu=0298D +lcaron=0013E +lcedil=0013C +lceil=02308 +lcub=0007B +lcy=0043B +ldca=02936 +ldquo=0201C +ldquor=0201E +ldrdhar=02967 +ldrushar=0294B +ldsh=021B2 +le=02264 +leftarrow=02190 +leftarrowtail=021A2 +leftharpoondown=021BD +leftharpoonup=021BC +leftleftarrows=021C7 +leftrightarrow=02194 +leftrightarrows=021C6 +leftrightharpoons=021CB +leftrightsquigarrow=021AD +leftthreetimes=022CB +leg=022DA +leq=02264 +leqq=02266 +leqslant=02A7D +les=02A7D +lescc=02AA8 +lesdot=02A7F +lesdoto=02A81 +lesdotor=02A83 +lesges=02A93 +lessapprox=02A85 +lessdot=022D6 +lesseqgtr=022DA +lesseqqgtr=02A8B +lessgtr=02276 +lesssim=02272 +lfisht=0297C +lfloor=0230A +lfr=1D529 +lg=02276 +lgE=02A91 +lhard=021BD +lharu=021BC +lharul=0296A +lhblk=02584 +ljcy=00459 +ll=0226A +llarr=021C7 +llcorner=0231E +llhard=0296B +lltri=025FA +lmidot=00140 +lmoust=023B0 +lmoustache=023B0 +lnE=02268 +lnap=02A89 +lnapprox=02A89 +lne=02A87 +lneq=02A87 +lneqq=02268 +lnsim=022E6 +loang=027EC +loarr=021FD +lobrk=027E6 +longleftarrow=027F5 +longleftrightarrow=027F7 +longmapsto=027FC +longrightarrow=027F6 +looparrowleft=021AB +looparrowright=021AC +lopar=02985 +lopf=1D55D +loplus=02A2D +lotimes=02A34 +lowast=02217 +lowbar=0005F +loz=025CA +lozenge=025CA +lozf=029EB +lpar=00028 +lparlt=02993 +lrarr=021C6 +lrcorner=0231F +lrhar=021CB +lrhard=0296D +lrm=0200E +lrtri=022BF +lsaquo=02039 +lscr=1D4C1 +lsh=021B0 +lsim=02272 +lsime=02A8D +lsimg=02A8F +lsqb=0005B +lsquo=02018 +lsquor=0201A +lstrok=00142 +lt=0003C +ltcc=02AA6 +ltcir=02A79 +ltdot=022D6 +lthree=022CB +ltimes=022C9 +ltlarr=02976 +ltquest=02A7B +ltrPar=02996 +ltri=025C3 +ltrie=022B4 +ltrif=025C2 +lurdshar=0294A +luruhar=02966 +mDDot=0223A +macr=000AF +male=02642 +malt=02720 +maltese=02720 +map=021A6 +mapsto=021A6 +mapstodown=021A7 +mapstoleft=021A4 +mapstoup=021A5 +marker=025AE +mcomma=02A29 +mcy=0043C +mdash=02014 +measuredangle=02221 +mfr=1D52A +mho=02127 +micro=000B5 +mid=02223 +midast=0002A +midcir=02AF0 +middot=000B7 +minus=02212 +minusb=0229F +minusd=02238 +minusdu=02A2A +mlcp=02ADB +mldr=02026 +mnplus=02213 +models=022A7 +mopf=1D55E +mp=02213 +mscr=1D4C2 +mstpos=0223E +mu=003BC +multimap=022B8 +mumap=022B8 +nLeftarrow=021CD +nLeftrightarrow=021CE +nRightarrow=021CF +nVDash=022AF +nVdash=022AE +nabla=02207 +nacute=00144 +nap=02249 +napos=00149 +napprox=02249 +natur=0266E +natural=0266E +naturals=02115 +nbsp=000A0 +ncap=02A43 +ncaron=00148 +ncedil=00146 +ncong=02247 +ncup=02A42 +ncy=0043D +ndash=02013 +ne=02260 +neArr=021D7 +nearhk=02924 +nearr=02197 +nearrow=02197 +nequiv=02262 +nesear=02928 +nexist=02204 +nexists=02204 +nfr=1D52B +nge=02271 +ngeq=02271 +ngsim=02275 +ngt=0226F +ngtr=0226F +nhArr=021CE +nharr=021AE +nhpar=02AF2 +ni=0220B +nis=022FC +nisd=022FA +niv=0220B +njcy=0045A +nlArr=021CD +nlarr=0219A +nldr=02025 +nle=02270 +nleftarrow=0219A +nleftrightarrow=021AE +nleq=02270 +nless=0226E +nlsim=02274 +nlt=0226E +nltri=022EA +nltrie=022EC +nmid=02224 +nopf=1D55F +not=000AC +notin=02209 +notinva=02209 +notinvb=022F7 +notinvc=022F6 +notni=0220C +notniva=0220C +notnivb=022FE +notnivc=022FD +npar=02226 +nparallel=02226 +npolint=02A14 +npr=02280 +nprcue=022E0 +nprec=02280 +nrArr=021CF +nrarr=0219B +nrightarrow=0219B +nrtri=022EB +nrtrie=022ED +nsc=02281 +nsccue=022E1 +nscr=1D4C3 +nshortmid=02224 +nshortparallel=02226 +nsim=02241 +nsime=02244 +nsimeq=02244 +nsmid=02224 +nspar=02226 +nsqsube=022E2 +nsqsupe=022E3 +nsub=02284 +nsube=02288 +nsubseteq=02288 +nsucc=02281 +nsup=02285 +nsupe=02289 +nsupseteq=02289 +ntgl=02279 +ntilde=000F1 +ntlg=02278 +ntriangleleft=022EA +ntrianglelefteq=022EC +ntriangleright=022EB +ntrianglerighteq=022ED +nu=003BD +num=00023 +numero=02116 +numsp=02007 +nvDash=022AD +nvHarr=02904 +nvdash=022AC +nvinfin=029DE +nvlArr=02902 +nvrArr=02903 +nwArr=021D6 +nwarhk=02923 +nwarr=02196 +nwarrow=02196 +nwnear=02927 +oS=024C8 +oacute=000F3 +oast=0229B +ocir=0229A +ocirc=000F4 +ocy=0043E +odash=0229D +odblac=00151 +odiv=02A38 +odot=02299 +odsold=029BC +oelig=00153 +ofcir=029BF +ofr=1D52C +ogon=002DB +ograve=000F2 +ogt=029C1 +ohbar=029B5 +ohm=003A9 +oint=0222E +olarr=021BA +olcir=029BE +olcross=029BB +oline=0203E +olt=029C0 +omacr=0014D +omega=003C9 +omicron=003BF +omid=029B6 +ominus=02296 +oopf=1D560 +opar=029B7 +operp=029B9 +oplus=02295 +or=02228 +orarr=021BB +ord=02A5D +order=02134 +orderof=02134 +ordf=000AA +ordm=000BA +origof=022B6 +oror=02A56 +orslope=02A57 +orv=02A5B +oscr=02134 +oslash=000F8 +osol=02298 +otilde=000F5 +otimes=02297 +otimesas=02A36 +ouml=000F6 +ovbar=0233D +par=02225 +para=000B6 +parallel=02225 +parsim=02AF3 +parsl=02AFD +part=02202 +pcy=0043F +percnt=00025 +period=0002E +permil=02030 +perp=022A5 +pertenk=02031 +pfr=1D52D +phi=003C6 +phiv=003D5 +phmmat=02133 +phone=0260E +pi=003C0 +pitchfork=022D4 +piv=003D6 +planck=0210F +planckh=0210E +plankv=0210F +plus=0002B +plusacir=02A23 +plusb=0229E +pluscir=02A22 +plusdo=02214 +plusdu=02A25 +pluse=02A72 +plusmn=000B1 +plussim=02A26 +plustwo=02A27 +pm=000B1 +pointint=02A15 +popf=1D561 +pound=000A3 +pr=0227A +prE=02AB3 +prap=02AB7 +prcue=0227C +pre=02AAF +prec=0227A +precapprox=02AB7 +preccurlyeq=0227C +preceq=02AAF +precnapprox=02AB9 +precneqq=02AB5 +precnsim=022E8 +precsim=0227E +prime=02032 +primes=02119 +prnE=02AB5 +prnap=02AB9 +prnsim=022E8 +prod=0220F +profalar=0232E +profline=02312 +profsurf=02313 +prop=0221D +propto=0221D +prsim=0227E +prurel=022B0 +pscr=1D4C5 +psi=003C8 +puncsp=02008 +qfr=1D52E +qint=02A0C +qopf=1D562 +qprime=02057 +qscr=1D4C6 +quaternions=0210D +quatint=02A16 +quest=0003F +questeq=0225F +quot=00022 +rAarr=021DB +rArr=021D2 +rAtail=0291C +rBarr=0290F +rHar=02964 +racute=00155 +radic=0221A +raemptyv=029B3 +rang=027E9 +rangd=02992 +range=029A5 +rangle=027E9 +raquo=000BB +rarr=02192 +rarrap=02975 +rarrb=021E5 +rarrbfs=02920 +rarrc=02933 +rarrfs=0291E +rarrhk=021AA +rarrlp=021AC +rarrpl=02945 +rarrsim=02974 +rarrtl=021A3 +rarrw=0219D +ratail=0291A +ratio=02236 +rationals=0211A +rbarr=0290D +rbbrk=02773 +rbrace=0007D +rbrack=0005D +rbrke=0298C +rbrksld=0298E +rbrkslu=02990 +rcaron=00159 +rcedil=00157 +rceil=02309 +rcub=0007D +rcy=00440 +rdca=02937 +rdldhar=02969 +rdquo=0201D +rdquor=0201D +rdsh=021B3 +real=0211C +realine=0211B +realpart=0211C +reals=0211D +rect=025AD +reg=000AE +rfisht=0297D +rfloor=0230B +rfr=1D52F +rhard=021C1 +rharu=021C0 +rharul=0296C +rho=003C1 +rhov=003F1 +rightarrow=02192 +rightarrowtail=021A3 +rightharpoondown=021C1 +rightharpoonup=021C0 +rightleftarrows=021C4 +rightleftharpoons=021CC +rightrightarrows=021C9 +rightsquigarrow=0219D +rightthreetimes=022CC +ring=002DA +risingdotseq=02253 +rlarr=021C4 +rlhar=021CC +rlm=0200F +rmoust=023B1 +rmoustache=023B1 +rnmid=02AEE +roang=027ED +roarr=021FE +robrk=027E7 +ropar=02986 +ropf=1D563 +roplus=02A2E +rotimes=02A35 +rpar=00029 +rpargt=02994 +rppolint=02A12 +rrarr=021C9 +rsaquo=0203A +rscr=1D4C7 +rsh=021B1 +rsqb=0005D +rsquo=02019 +rsquor=02019 +rthree=022CC +rtimes=022CA +rtri=025B9 +rtrie=022B5 +rtrif=025B8 +rtriltri=029CE +ruluhar=02968 +rx=0211E +sacute=0015B +sbquo=0201A +sc=0227B +scE=02AB4 +scap=02AB8 +scaron=00161 +sccue=0227D +sce=02AB0 +scedil=0015F +scirc=0015D +scnE=02AB6 +scnap=02ABA +scnsim=022E9 +scpolint=02A13 +scsim=0227F +scy=00441 +sdot=022C5 +sdotb=022A1 +sdote=02A66 +seArr=021D8 +searhk=02925 +searr=02198 +searrow=02198 +sect=000A7 +semi=0003B +seswar=02929 +setminus=02216 +setmn=02216 +sext=02736 +sfr=1D530 +sfrown=02322 +sharp=0266F +shchcy=00449 +shcy=00448 +shortmid=02223 +shortparallel=02225 +shy=000AD +sigma=003C3 +sigmaf=003C2 +sigmav=003C2 +sim=0223C +simdot=02A6A +sime=02243 +simeq=02243 +simg=02A9E +simgE=02AA0 +siml=02A9D +simlE=02A9F +simne=02246 +simplus=02A24 +simrarr=02972 +slarr=02190 +smallsetminus=02216 +smashp=02A33 +smeparsl=029E4 +smid=02223 +smile=02323 +smt=02AAA +smte=02AAC +softcy=0044C +sol=0002F +solb=029C4 +solbar=0233F +sopf=1D564 +spades=02660 +spadesuit=02660 +spar=02225 +sqcap=02293 +sqcup=02294 +sqsub=0228F +sqsube=02291 +sqsubset=0228F +sqsubseteq=02291 +sqsup=02290 +sqsupe=02292 +sqsupset=02290 +sqsupseteq=02292 +squ=025A1 +square=025A1 +squarf=025AA +squf=025AA +srarr=02192 +sscr=1D4C8 +ssetmn=02216 +ssmile=02323 +sstarf=022C6 +star=02606 +starf=02605 +straightepsilon=003F5 +straightphi=003D5 +strns=000AF +sub=02282 +subE=02AC5 +subdot=02ABD +sube=02286 +subedot=02AC3 +submult=02AC1 +subnE=02ACB +subne=0228A +subplus=02ABF +subrarr=02979 +subset=02282 +subseteq=02286 +subseteqq=02AC5 +subsetneq=0228A +subsetneqq=02ACB +subsim=02AC7 +subsub=02AD5 +subsup=02AD3 +succ=0227B +succapprox=02AB8 +succcurlyeq=0227D +succeq=02AB0 +succnapprox=02ABA +succneqq=02AB6 +succnsim=022E9 +succsim=0227F +sum=02211 +sung=0266A +sup1=000B9 +sup2=000B2 +sup3=000B3 +sup=02283 +supE=02AC6 +supdot=02ABE +supdsub=02AD8 +supe=02287 +supedot=02AC4 +suphsol=027C9 +suphsub=02AD7 +suplarr=0297B +supmult=02AC2 +supnE=02ACC +supne=0228B +supplus=02AC0 +supset=02283 +supseteq=02287 +supseteqq=02AC6 +supsetneq=0228B +supsetneqq=02ACC +supsim=02AC8 +supsub=02AD4 +supsup=02AD6 +swArr=021D9 +swarhk=02926 +swarr=02199 +swarrow=02199 +swnwar=0292A +szlig=000DF +target=02316 +tau=003C4 +tbrk=023B4 +tcaron=00165 +tcedil=00163 +tcy=00442 +tdot=020DB +telrec=02315 +tfr=1D531 +there4=02234 +therefore=02234 +theta=003B8 +thetasym=003D1 +thetav=003D1 +thickapprox=02248 +thicksim=0223C +thinsp=02009 +thkap=02248 +thksim=0223C +thorn=000FE +tilde=002DC +times=000D7 +timesb=022A0 +timesbar=02A31 +timesd=02A30 +tint=0222D +toea=02928 +top=022A4 +topbot=02336 +topcir=02AF1 +topf=1D565 +topfork=02ADA +tosa=02929 +tprime=02034 +trade=02122 +triangle=025B5 +triangledown=025BF +triangleleft=025C3 +trianglelefteq=022B4 +triangleq=0225C +triangleright=025B9 +trianglerighteq=022B5 +tridot=025EC +trie=0225C +triminus=02A3A +triplus=02A39 +trisb=029CD +tritime=02A3B +trpezium=023E2 +tscr=1D4C9 +tscy=00446 +tshcy=0045B +tstrok=00167 +twixt=0226C +twoheadleftarrow=0219E +twoheadrightarrow=021A0 +uArr=021D1 +uHar=02963 +uacute=000FA +uarr=02191 +ubrcy=0045E +ubreve=0016D +ucirc=000FB +ucy=00443 +udarr=021C5 +udblac=00171 +udhar=0296E +ufisht=0297E +ufr=1D532 +ugrave=000F9 +uharl=021BF +uharr=021BE +uhblk=02580 +ulcorn=0231C +ulcorner=0231C +ulcrop=0230F +ultri=025F8 +umacr=0016B +uml=000A8 +uogon=00173 +uopf=1D566 +uparrow=02191 +updownarrow=02195 +upharpoonleft=021BF +upharpoonright=021BE +uplus=0228E +upsi=003C5 +upsih=003D2 +upsilon=003C5 +upuparrows=021C8 +urcorn=0231D +urcorner=0231D +urcrop=0230E +uring=0016F +urtri=025F9 +uscr=1D4CA +utdot=022F0 +utilde=00169 +utri=025B5 +utrif=025B4 +uuarr=021C8 +uuml=000FC +uwangle=029A7 +vArr=021D5 +vBar=02AE8 +vBarv=02AE9 +vDash=022A8 +vangrt=0299C +varepsilon=003F5 +varkappa=003F0 +varnothing=02205 +varphi=003D5 +varpi=003D6 +varpropto=0221D +varr=02195 +varrho=003F1 +varsigma=003C2 +vartheta=003D1 +vartriangleleft=022B2 +vartriangleright=022B3 +vcy=00432 +vdash=022A2 +vee=02228 +veebar=022BB +veeeq=0225A +vellip=022EE +verbar=0007C +vert=0007C +vfr=1D533 +vltri=022B2 +vopf=1D567 +vprop=0221D +vrtri=022B3 +vscr=1D4CB +vzigzag=0299A +wcirc=00175 +wedbar=02A5F +wedge=02227 +wedgeq=02259 +weierp=02118 +wfr=1D534 +wopf=1D568 +wp=02118 +wr=02240 +wreath=02240 +wscr=1D4CC +xcap=022C2 +xcirc=025EF +xcup=022C3 +xdtri=025BD +xfr=1D535 +xhArr=027FA +xharr=027F7 +xi=003BE +xlArr=027F8 +xlarr=027F5 +xmap=027FC +xnis=022FB +xodot=02A00 +xopf=1D569 +xoplus=02A01 +xotime=02A02 +xrArr=027F9 +xrarr=027F6 +xscr=1D4CD +xsqcup=02A06 +xuplus=02A04 +xutri=025B3 +xvee=022C1 +xwedge=022C0 +yacute=000FD +yacy=0044F +ycirc=00177 +ycy=0044B +yen=000A5 +yfr=1D536 +yicy=00457 +yopf=1D56A +yscr=1D4CE +yucy=0044E +yuml=000FF +zacute=0017A +zcaron=0017E +zcy=00437 +zdot=0017C +zeetrf=02128 +zeta=003B6 +zfr=1D537 +zhcy=00436 +zigrarr=021DD +zopf=1D56B +zscr=1D4CF +zwj=0200D +zwnj=0200C diff --git a/src/org/jsoup/nodes/package-info.java b/src/org/jsoup/nodes/package-info.java new file mode 100644 index 0000000000..24b12803ff --- /dev/null +++ b/src/org/jsoup/nodes/package-info.java @@ -0,0 +1,4 @@ +/** + HTML document structure nodes. + */ +package org.jsoup.nodes;
\ No newline at end of file |