aboutsummaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Document.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Document.java')
-rw-r--r--server/src/org/jsoup/nodes/Document.java192
1 files changed, 122 insertions, 70 deletions
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java
index adb371ce14..f1c4595faa 100644
--- a/server/src/org/jsoup/nodes/Document.java
+++ b/server/src/org/jsoup/nodes/Document.java
@@ -1,36 +1,42 @@
package org.jsoup.nodes;
-import org.jsoup.helper.Validate;
-import org.jsoup.parser.Tag;
-import org.jsoup.select.Elements;
-
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.ArrayList;
import java.util.List;
-/**
- A HTML Document.
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Tag;
+import org.jsoup.select.Elements;
- @author Jonathan Hedley, jonathan@hedley.net */
+/**
+ * A HTML Document.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class Document extends Element {
private OutputSettings outputSettings = new OutputSettings();
private QuirksMode quirksMode = QuirksMode.noQuirks;
/**
- Create a new, empty Document.
- @param baseUri base URI of document
- @see org.jsoup.Jsoup#parse
- @see #createShell
+ * Create a new, empty Document.
+ *
+ * @param baseUri
+ * base URI of document
+ * @see org.jsoup.Jsoup#parse
+ * @see #createShell
*/
public Document(String baseUri) {
super(Tag.valueOf("#root"), baseUri);
}
/**
- Create a valid, empty shell of a document, suitable for adding more elements to.
- @param baseUri baseUri of document
- @return document with html, head, and body elements.
+ * Create a valid, empty shell of a document, suitable for adding more
+ * elements to.
+ *
+ * @param baseUri
+ * baseUri of document
+ * @return document with html, head, and body elements.
*/
static public Document createShell(String baseUri) {
Validate.notNull(baseUri);
@@ -44,24 +50,27 @@ public class Document extends Element {
}
/**
- Accessor to the document's {@code head} element.
- @return {@code head}
+ * Accessor to the document's {@code head} element.
+ *
+ * @return {@code head}
*/
public Element head() {
return findFirstElementByTagName("head", this);
}
/**
- Accessor to the document's {@code body} element.
- @return {@code body}
+ * Accessor to the document's {@code body} element.
+ *
+ * @return {@code body}
*/
public Element body() {
return findFirstElementByTagName("body", this);
}
/**
- Get the string contents of the document's {@code title} element.
- @return Trimmed title, or empty string if none set.
+ * Get the string contents of the document's {@code title} element.
+ *
+ * @return Trimmed title, or empty string if none set.
*/
public String title() {
Element titleEl = getElementsByTag("title").first();
@@ -69,9 +78,11 @@ public class Document extends Element {
}
/**
- Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if
- not present
- @param title string to set as title
+ * Set the document's {@code title} element. Updates the existing element,
+ * or adds {@code title} to {@code head} if not present
+ *
+ * @param title
+ * string to set as title
*/
public void title(String title) {
Validate.notNull(title);
@@ -84,29 +95,38 @@ public class Document extends Element {
}
/**
- Create a new Element, with this document's base uri. Does not make the new element a child of this document.
- @param tagName element tag name (e.g. {@code a})
- @return new element
+ * Create a new Element, with this document's base uri. Does not make the
+ * new element a child of this document.
+ *
+ * @param tagName
+ * element tag name (e.g. {@code a})
+ * @return new element
*/
public Element createElement(String tagName) {
- return new Element(Tag.valueOf(tagName), this.baseUri());
+ return new Element(Tag.valueOf(tagName), baseUri());
}
/**
- Normalise the document. This happens after the parse phase so generally does not need to be called.
- Moves any text content that is not in the body element into the body.
- @return this document after normalisation
+ * Normalise the document. This happens after the parse phase so generally
+ * does not need to be called. Moves any text content that is not in the
+ * body element into the body.
+ *
+ * @return this document after normalisation
*/
public Document normalise() {
Element htmlEl = findFirstElementByTagName("html", this);
- if (htmlEl == null)
+ if (htmlEl == null) {
htmlEl = appendElement("html");
- if (head() == null)
+ }
+ if (head() == null) {
htmlEl.prependElement("head");
- if (body() == null)
+ }
+ if (body() == null) {
htmlEl.appendElement("body");
+ }
- // pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care
+ // pull text nodes out of root, html, and head els, and push into body.
+ // non-text nodes are already taken care
// of. do in inverse order to maintain text order.
normaliseTextNodes(head());
normaliseTextNodes(htmlEl);
@@ -114,22 +134,23 @@ public class Document extends Element {
normaliseStructure("head", htmlEl);
normaliseStructure("body", htmlEl);
-
+
return this;
}
// does not recurse.
private void normaliseTextNodes(Element element) {
List<Node> toMove = new ArrayList<Node>();
- for (Node node: element.childNodes) {
+ for (Node node : element.childNodes) {
if (node instanceof TextNode) {
TextNode tn = (TextNode) node;
- if (!tn.isBlank())
+ if (!tn.isBlank()) {
toMove.add(tn);
+ }
}
}
- for (int i = toMove.size()-1; i >= 0; i--) {
+ for (int i = toMove.size() - 1; i >= 0; i--) {
Node node = toMove.get(i);
element.removeChild(node);
body().prependChild(new TextNode(" ", ""));
@@ -137,37 +158,42 @@ public class Document extends Element {
}
}
- // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
+ // merge multiple <head> or <body> contents into one, delete the remainder,
+ // and ensure they are owned by <html>
private void normaliseStructure(String tag, Element htmlEl) {
- Elements elements = this.getElementsByTag(tag);
- Element master = elements.first(); // will always be available as created above if not existent
+ Elements elements = getElementsByTag(tag);
+ Element master = elements.first(); // will always be available as
+ // created above if not existent
if (elements.size() > 1) { // dupes, move contents to master
List<Node> toMove = new ArrayList<Node>();
for (int i = 1; i < elements.size(); i++) {
Node dupe = elements.get(i);
- for (Node node : dupe.childNodes)
+ for (Node node : dupe.childNodes) {
toMove.add(node);
+ }
dupe.remove();
}
- for (Node dupe : toMove)
+ for (Node dupe : toMove) {
master.appendChild(dupe);
+ }
}
// ensure parented by <html>
if (!master.parent().equals(htmlEl)) {
- htmlEl.appendChild(master); // includes remove()
+ htmlEl.appendChild(master); // includes remove()
}
}
// fast method to get first by tag name, used for html, head, body finders
private Element findFirstElementByTagName(String tag, Node node) {
- if (node.nodeName().equals(tag))
+ if (node.nodeName().equals(tag)) {
return (Element) node;
- else {
- for (Node child: node.childNodes) {
+ } else {
+ for (Node child : node.childNodes) {
Element found = findFirstElementByTagName(tag, child);
- if (found != null)
+ if (found != null) {
return found;
+ }
}
}
return null;
@@ -179,9 +205,12 @@ public class Document extends Element {
}
/**
- Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
- @param text unencoded text
- @return this document
+ * Set the text of the {@code body} of this document. Any existing nodes
+ * within the body will be cleared.
+ *
+ * @param text
+ * unencoded text
+ * @return this document
*/
@Override
public Element text(String text) {
@@ -197,12 +226,13 @@ public class Document extends Element {
@Override
public Document clone() {
Document clone = (Document) super.clone();
- clone.outputSettings = this.outputSettings.clone();
+ clone.outputSettings = outputSettings.clone();
return clone;
}
/**
- * A Document's output settings control the form of the text() and html() methods.
+ * A Document's output settings control the form of the text() and html()
+ * methods.
*/
public static class OutputSettings implements Cloneable {
private Entities.EscapeMode escapeMode = Entities.EscapeMode.base;
@@ -211,14 +241,18 @@ public class Document extends Element {
private boolean prettyPrint = true;
private int indentAmount = 1;
- public OutputSettings() {}
+ public OutputSettings() {
+ }
/**
- * Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML
- * entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>,
- * which uses the complete set of HTML named entities.
+ * Get the document's current HTML escape mode: <code>base</code>, which
+ * provides a limited set of named HTML entities and escapes other
+ * characters as numbered entities for maximum compatibility; or
+ * <code>extended</code>, which uses the complete set of HTML named
+ * entities.
* <p>
* The default escape mode is <code>base</code>.
+ *
* @return the document's current escape mode
*/
public Entities.EscapeMode escapeMode() {
@@ -227,7 +261,9 @@ public class Document extends Element {
/**
* Set the document's escape mode
- * @param escapeMode the new escape mode to use
+ *
+ * @param escapeMode
+ * the new escape mode to use
* @return the document's output settings, for chaining
*/
public OutputSettings escapeMode(Entities.EscapeMode escapeMode) {
@@ -236,11 +272,14 @@ public class Document extends Element {
}
/**
- * Get the document's current output charset, which is used to control which characters are escaped when
- * generating HTML (via the <code>html()</code> methods), and which are kept intact.
+ * Get the document's current output charset, which is used to control
+ * which characters are escaped when generating HTML (via the
+ * <code>html()</code> methods), and which are kept intact.
* <p>
- * Where possible (when parsing from a URL or File), the document's output charset is automatically set to the
- * input charset. Otherwise, it defaults to UTF-8.
+ * Where possible (when parsing from a URL or File), the document's
+ * output charset is automatically set to the input charset. Otherwise,
+ * it defaults to UTF-8.
+ *
* @return the document's current charset.
*/
public Charset charset() {
@@ -249,7 +288,9 @@ public class Document extends Element {
/**
* Update the document's output charset.
- * @param charset the new charset to use.
+ *
+ * @param charset
+ * the new charset to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(Charset charset) {
@@ -261,7 +302,9 @@ public class Document extends Element {
/**
* Update the document's output charset.
- * @param charset the new charset (by name) to use.
+ *
+ * @param charset
+ * the new charset (by name) to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(String charset) {
@@ -274,8 +317,10 @@ public class Document extends Element {
}
/**
- * Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format
- * the output, and the output will generally look like the input.
+ * Get if pretty printing is enabled. Default is true. If disabled, the
+ * HTML output methods will not re-format the output, and the output
+ * will generally look like the input.
+ *
* @return if pretty printing is enabled.
*/
public boolean prettyPrint() {
@@ -284,7 +329,9 @@ public class Document extends Element {
/**
* Enable or disable pretty printing.
- * @param pretty new pretty print setting
+ *
+ * @param pretty
+ * new pretty print setting
* @return this, for chaining
*/
public OutputSettings prettyPrint(boolean pretty) {
@@ -294,6 +341,7 @@ public class Document extends Element {
/**
* Get the current tag indent amount, used when pretty printing.
+ *
* @return the current indent amount
*/
public int indentAmount() {
@@ -302,7 +350,10 @@ public class Document extends Element {
/**
* Set the indent amount for pretty printing
- * @param indentAmount number of spaces to use for indenting each level. Must be >= 0.
+ *
+ * @param indentAmount
+ * number of spaces to use for indenting each level. Must be
+ * >= 0.
* @return this, for chaining
*/
public OutputSettings indentAmount(int indentAmount) {
@@ -321,13 +372,15 @@ public class Document extends Element {
}
clone.charset(charset.name()); // new charset and charset encoder
clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
- // indentAmount, prettyPrint are primitives so object.clone() will handle
+ // indentAmount, prettyPrint are primitives so object.clone() will
+ // handle
return clone;
}
}
/**
* Get the document's current output settings.
+ *
* @return the document's current output settings.
*/
public OutputSettings outputSettings() {
@@ -347,4 +400,3 @@ public class Document extends Element {
return this;
}
}
-