diff options
Diffstat (limited to 'server/src/org/jsoup/nodes/Document.java')
-rw-r--r-- | server/src/org/jsoup/nodes/Document.java | 192 |
1 files changed, 122 insertions, 70 deletions
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java index adb371ce14..f1c4595faa 100644 --- a/server/src/org/jsoup/nodes/Document.java +++ b/server/src/org/jsoup/nodes/Document.java @@ -1,36 +1,42 @@ package org.jsoup.nodes; -import org.jsoup.helper.Validate; -import org.jsoup.parser.Tag; -import org.jsoup.select.Elements; - import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.util.ArrayList; import java.util.List; -/** - A HTML Document. +import org.jsoup.helper.Validate; +import org.jsoup.parser.Tag; +import org.jsoup.select.Elements; - @author Jonathan Hedley, jonathan@hedley.net */ +/** + * A HTML Document. + * + * @author Jonathan Hedley, jonathan@hedley.net + */ public class Document extends Element { private OutputSettings outputSettings = new OutputSettings(); private QuirksMode quirksMode = QuirksMode.noQuirks; /** - Create a new, empty Document. - @param baseUri base URI of document - @see org.jsoup.Jsoup#parse - @see #createShell + * Create a new, empty Document. + * + * @param baseUri + * base URI of document + * @see org.jsoup.Jsoup#parse + * @see #createShell */ public Document(String baseUri) { super(Tag.valueOf("#root"), baseUri); } /** - Create a valid, empty shell of a document, suitable for adding more elements to. - @param baseUri baseUri of document - @return document with html, head, and body elements. + * Create a valid, empty shell of a document, suitable for adding more + * elements to. + * + * @param baseUri + * baseUri of document + * @return document with html, head, and body elements. */ static public Document createShell(String baseUri) { Validate.notNull(baseUri); @@ -44,24 +50,27 @@ public class Document extends Element { } /** - Accessor to the document's {@code head} element. - @return {@code head} + * Accessor to the document's {@code head} element. + * + * @return {@code head} */ public Element head() { return findFirstElementByTagName("head", this); } /** - Accessor to the document's {@code body} element. - @return {@code body} + * Accessor to the document's {@code body} element. + * + * @return {@code body} */ public Element body() { return findFirstElementByTagName("body", this); } /** - Get the string contents of the document's {@code title} element. - @return Trimmed title, or empty string if none set. + * Get the string contents of the document's {@code title} element. + * + * @return Trimmed title, or empty string if none set. */ public String title() { Element titleEl = getElementsByTag("title").first(); @@ -69,9 +78,11 @@ public class Document extends Element { } /** - Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if - not present - @param title string to set as title + * Set the document's {@code title} element. Updates the existing element, + * or adds {@code title} to {@code head} if not present + * + * @param title + * string to set as title */ public void title(String title) { Validate.notNull(title); @@ -84,29 +95,38 @@ public class Document extends Element { } /** - Create a new Element, with this document's base uri. Does not make the new element a child of this document. - @param tagName element tag name (e.g. {@code a}) - @return new element + * Create a new Element, with this document's base uri. Does not make the + * new element a child of this document. + * + * @param tagName + * element tag name (e.g. {@code a}) + * @return new element */ public Element createElement(String tagName) { - return new Element(Tag.valueOf(tagName), this.baseUri()); + return new Element(Tag.valueOf(tagName), baseUri()); } /** - Normalise the document. This happens after the parse phase so generally does not need to be called. - Moves any text content that is not in the body element into the body. - @return this document after normalisation + * Normalise the document. This happens after the parse phase so generally + * does not need to be called. Moves any text content that is not in the + * body element into the body. + * + * @return this document after normalisation */ public Document normalise() { Element htmlEl = findFirstElementByTagName("html", this); - if (htmlEl == null) + if (htmlEl == null) { htmlEl = appendElement("html"); - if (head() == null) + } + if (head() == null) { htmlEl.prependElement("head"); - if (body() == null) + } + if (body() == null) { htmlEl.appendElement("body"); + } - // pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care + // pull text nodes out of root, html, and head els, and push into body. + // non-text nodes are already taken care // of. do in inverse order to maintain text order. normaliseTextNodes(head()); normaliseTextNodes(htmlEl); @@ -114,22 +134,23 @@ public class Document extends Element { normaliseStructure("head", htmlEl); normaliseStructure("body", htmlEl); - + return this; } // does not recurse. private void normaliseTextNodes(Element element) { List<Node> toMove = new ArrayList<Node>(); - for (Node node: element.childNodes) { + for (Node node : element.childNodes) { if (node instanceof TextNode) { TextNode tn = (TextNode) node; - if (!tn.isBlank()) + if (!tn.isBlank()) { toMove.add(tn); + } } } - for (int i = toMove.size()-1; i >= 0; i--) { + for (int i = toMove.size() - 1; i >= 0; i--) { Node node = toMove.get(i); element.removeChild(node); body().prependChild(new TextNode(" ", "")); @@ -137,37 +158,42 @@ public class Document extends Element { } } - // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html> + // merge multiple <head> or <body> contents into one, delete the remainder, + // and ensure they are owned by <html> private void normaliseStructure(String tag, Element htmlEl) { - Elements elements = this.getElementsByTag(tag); - Element master = elements.first(); // will always be available as created above if not existent + Elements elements = getElementsByTag(tag); + Element master = elements.first(); // will always be available as + // created above if not existent if (elements.size() > 1) { // dupes, move contents to master List<Node> toMove = new ArrayList<Node>(); for (int i = 1; i < elements.size(); i++) { Node dupe = elements.get(i); - for (Node node : dupe.childNodes) + for (Node node : dupe.childNodes) { toMove.add(node); + } dupe.remove(); } - for (Node dupe : toMove) + for (Node dupe : toMove) { master.appendChild(dupe); + } } // ensure parented by <html> if (!master.parent().equals(htmlEl)) { - htmlEl.appendChild(master); // includes remove() + htmlEl.appendChild(master); // includes remove() } } // fast method to get first by tag name, used for html, head, body finders private Element findFirstElementByTagName(String tag, Node node) { - if (node.nodeName().equals(tag)) + if (node.nodeName().equals(tag)) { return (Element) node; - else { - for (Node child: node.childNodes) { + } else { + for (Node child : node.childNodes) { Element found = findFirstElementByTagName(tag, child); - if (found != null) + if (found != null) { return found; + } } } return null; @@ -179,9 +205,12 @@ public class Document extends Element { } /** - Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared. - @param text unencoded text - @return this document + * Set the text of the {@code body} of this document. Any existing nodes + * within the body will be cleared. + * + * @param text + * unencoded text + * @return this document */ @Override public Element text(String text) { @@ -197,12 +226,13 @@ public class Document extends Element { @Override public Document clone() { Document clone = (Document) super.clone(); - clone.outputSettings = this.outputSettings.clone(); + clone.outputSettings = outputSettings.clone(); return clone; } /** - * A Document's output settings control the form of the text() and html() methods. + * A Document's output settings control the form of the text() and html() + * methods. */ public static class OutputSettings implements Cloneable { private Entities.EscapeMode escapeMode = Entities.EscapeMode.base; @@ -211,14 +241,18 @@ public class Document extends Element { private boolean prettyPrint = true; private int indentAmount = 1; - public OutputSettings() {} + public OutputSettings() { + } /** - * Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML - * entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>, - * which uses the complete set of HTML named entities. + * Get the document's current HTML escape mode: <code>base</code>, which + * provides a limited set of named HTML entities and escapes other + * characters as numbered entities for maximum compatibility; or + * <code>extended</code>, which uses the complete set of HTML named + * entities. * <p> * The default escape mode is <code>base</code>. + * * @return the document's current escape mode */ public Entities.EscapeMode escapeMode() { @@ -227,7 +261,9 @@ public class Document extends Element { /** * Set the document's escape mode - * @param escapeMode the new escape mode to use + * + * @param escapeMode + * the new escape mode to use * @return the document's output settings, for chaining */ public OutputSettings escapeMode(Entities.EscapeMode escapeMode) { @@ -236,11 +272,14 @@ public class Document extends Element { } /** - * Get the document's current output charset, which is used to control which characters are escaped when - * generating HTML (via the <code>html()</code> methods), and which are kept intact. + * Get the document's current output charset, which is used to control + * which characters are escaped when generating HTML (via the + * <code>html()</code> methods), and which are kept intact. * <p> - * Where possible (when parsing from a URL or File), the document's output charset is automatically set to the - * input charset. Otherwise, it defaults to UTF-8. + * Where possible (when parsing from a URL or File), the document's + * output charset is automatically set to the input charset. Otherwise, + * it defaults to UTF-8. + * * @return the document's current charset. */ public Charset charset() { @@ -249,7 +288,9 @@ public class Document extends Element { /** * Update the document's output charset. - * @param charset the new charset to use. + * + * @param charset + * the new charset to use. * @return the document's output settings, for chaining */ public OutputSettings charset(Charset charset) { @@ -261,7 +302,9 @@ public class Document extends Element { /** * Update the document's output charset. - * @param charset the new charset (by name) to use. + * + * @param charset + * the new charset (by name) to use. * @return the document's output settings, for chaining */ public OutputSettings charset(String charset) { @@ -274,8 +317,10 @@ public class Document extends Element { } /** - * Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format - * the output, and the output will generally look like the input. + * Get if pretty printing is enabled. Default is true. If disabled, the + * HTML output methods will not re-format the output, and the output + * will generally look like the input. + * * @return if pretty printing is enabled. */ public boolean prettyPrint() { @@ -284,7 +329,9 @@ public class Document extends Element { /** * Enable or disable pretty printing. - * @param pretty new pretty print setting + * + * @param pretty + * new pretty print setting * @return this, for chaining */ public OutputSettings prettyPrint(boolean pretty) { @@ -294,6 +341,7 @@ public class Document extends Element { /** * Get the current tag indent amount, used when pretty printing. + * * @return the current indent amount */ public int indentAmount() { @@ -302,7 +350,10 @@ public class Document extends Element { /** * Set the indent amount for pretty printing - * @param indentAmount number of spaces to use for indenting each level. Must be >= 0. + * + * @param indentAmount + * number of spaces to use for indenting each level. Must be + * >= 0. * @return this, for chaining */ public OutputSettings indentAmount(int indentAmount) { @@ -321,13 +372,15 @@ public class Document extends Element { } clone.charset(charset.name()); // new charset and charset encoder clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); - // indentAmount, prettyPrint are primitives so object.clone() will handle + // indentAmount, prettyPrint are primitives so object.clone() will + // handle return clone; } } /** * Get the document's current output settings. + * * @return the document's current output settings. */ public OutputSettings outputSettings() { @@ -347,4 +400,3 @@ public class Document extends Element { return this; } } - |