summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Document.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Document.java')
-rw-r--r--server/src/org/jsoup/nodes/Document.java402
1 files changed, 0 insertions, 402 deletions
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java
deleted file mode 100644
index f1c4595faa..0000000000
--- a/server/src/org/jsoup/nodes/Document.java
+++ /dev/null
@@ -1,402 +0,0 @@
-package org.jsoup.nodes;
-
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetEncoder;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.jsoup.helper.Validate;
-import org.jsoup.parser.Tag;
-import org.jsoup.select.Elements;
-
-/**
- * A HTML Document.
- *
- * @author Jonathan Hedley, jonathan@hedley.net
- */
-public class Document extends Element {
- private OutputSettings outputSettings = new OutputSettings();
- private QuirksMode quirksMode = QuirksMode.noQuirks;
-
- /**
- * Create a new, empty Document.
- *
- * @param baseUri
- * base URI of document
- * @see org.jsoup.Jsoup#parse
- * @see #createShell
- */
- public Document(String baseUri) {
- super(Tag.valueOf("#root"), baseUri);
- }
-
- /**
- * Create a valid, empty shell of a document, suitable for adding more
- * elements to.
- *
- * @param baseUri
- * baseUri of document
- * @return document with html, head, and body elements.
- */
- static public Document createShell(String baseUri) {
- Validate.notNull(baseUri);
-
- Document doc = new Document(baseUri);
- Element html = doc.appendElement("html");
- html.appendElement("head");
- html.appendElement("body");
-
- return doc;
- }
-
- /**
- * Accessor to the document's {@code head} element.
- *
- * @return {@code head}
- */
- public Element head() {
- return findFirstElementByTagName("head", this);
- }
-
- /**
- * Accessor to the document's {@code body} element.
- *
- * @return {@code body}
- */
- public Element body() {
- return findFirstElementByTagName("body", this);
- }
-
- /**
- * Get the string contents of the document's {@code title} element.
- *
- * @return Trimmed title, or empty string if none set.
- */
- public String title() {
- Element titleEl = getElementsByTag("title").first();
- return titleEl != null ? titleEl.text().trim() : "";
- }
-
- /**
- * Set the document's {@code title} element. Updates the existing element,
- * or adds {@code title} to {@code head} if not present
- *
- * @param title
- * string to set as title
- */
- public void title(String title) {
- Validate.notNull(title);
- Element titleEl = getElementsByTag("title").first();
- if (titleEl == null) { // add to head
- head().appendElement("title").text(title);
- } else {
- titleEl.text(title);
- }
- }
-
- /**
- * Create a new Element, with this document's base uri. Does not make the
- * new element a child of this document.
- *
- * @param tagName
- * element tag name (e.g. {@code a})
- * @return new element
- */
- public Element createElement(String tagName) {
- return new Element(Tag.valueOf(tagName), baseUri());
- }
-
- /**
- * Normalise the document. This happens after the parse phase so generally
- * does not need to be called. Moves any text content that is not in the
- * body element into the body.
- *
- * @return this document after normalisation
- */
- public Document normalise() {
- Element htmlEl = findFirstElementByTagName("html", this);
- if (htmlEl == null) {
- htmlEl = appendElement("html");
- }
- if (head() == null) {
- htmlEl.prependElement("head");
- }
- if (body() == null) {
- htmlEl.appendElement("body");
- }
-
- // pull text nodes out of root, html, and head els, and push into body.
- // non-text nodes are already taken care
- // of. do in inverse order to maintain text order.
- normaliseTextNodes(head());
- normaliseTextNodes(htmlEl);
- normaliseTextNodes(this);
-
- normaliseStructure("head", htmlEl);
- normaliseStructure("body", htmlEl);
-
- return this;
- }
-
- // does not recurse.
- private void normaliseTextNodes(Element element) {
- List<Node> toMove = new ArrayList<Node>();
- for (Node node : element.childNodes) {
- if (node instanceof TextNode) {
- TextNode tn = (TextNode) node;
- if (!tn.isBlank()) {
- toMove.add(tn);
- }
- }
- }
-
- for (int i = toMove.size() - 1; i >= 0; i--) {
- Node node = toMove.get(i);
- element.removeChild(node);
- body().prependChild(new TextNode(" ", ""));
- body().prependChild(node);
- }
- }
-
- // merge multiple <head> or <body> contents into one, delete the remainder,
- // and ensure they are owned by <html>
- private void normaliseStructure(String tag, Element htmlEl) {
- Elements elements = getElementsByTag(tag);
- Element master = elements.first(); // will always be available as
- // created above if not existent
- if (elements.size() > 1) { // dupes, move contents to master
- List<Node> toMove = new ArrayList<Node>();
- for (int i = 1; i < elements.size(); i++) {
- Node dupe = elements.get(i);
- for (Node node : dupe.childNodes) {
- toMove.add(node);
- }
- dupe.remove();
- }
-
- for (Node dupe : toMove) {
- master.appendChild(dupe);
- }
- }
- // ensure parented by <html>
- if (!master.parent().equals(htmlEl)) {
- htmlEl.appendChild(master); // includes remove()
- }
- }
-
- // fast method to get first by tag name, used for html, head, body finders
- private Element findFirstElementByTagName(String tag, Node node) {
- if (node.nodeName().equals(tag)) {
- return (Element) node;
- } else {
- for (Node child : node.childNodes) {
- Element found = findFirstElementByTagName(tag, child);
- if (found != null) {
- return found;
- }
- }
- }
- return null;
- }
-
- @Override
- public String outerHtml() {
- return super.html(); // no outer wrapper tag
- }
-
- /**
- * Set the text of the {@code body} of this document. Any existing nodes
- * within the body will be cleared.
- *
- * @param text
- * unencoded text
- * @return this document
- */
- @Override
- public Element text(String text) {
- body().text(text); // overridden to not nuke doc structure
- return this;
- }
-
- @Override
- public String nodeName() {
- return "#document";
- }
-
- @Override
- public Document clone() {
- Document clone = (Document) super.clone();
- clone.outputSettings = outputSettings.clone();
- return clone;
- }
-
- /**
- * A Document's output settings control the form of the text() and html()
- * methods.
- */
- public static class OutputSettings implements Cloneable {
- private Entities.EscapeMode escapeMode = Entities.EscapeMode.base;
- private Charset charset = Charset.forName("UTF-8");
- private CharsetEncoder charsetEncoder = charset.newEncoder();
- private boolean prettyPrint = true;
- private int indentAmount = 1;
-
- public OutputSettings() {
- }
-
- /**
- * Get the document's current HTML escape mode: <code>base</code>, which
- * provides a limited set of named HTML entities and escapes other
- * characters as numbered entities for maximum compatibility; or
- * <code>extended</code>, which uses the complete set of HTML named
- * entities.
- * <p>
- * The default escape mode is <code>base</code>.
- *
- * @return the document's current escape mode
- */
- public Entities.EscapeMode escapeMode() {
- return escapeMode;
- }
-
- /**
- * Set the document's escape mode
- *
- * @param escapeMode
- * the new escape mode to use
- * @return the document's output settings, for chaining
- */
- public OutputSettings escapeMode(Entities.EscapeMode escapeMode) {
- this.escapeMode = escapeMode;
- return this;
- }
-
- /**
- * Get the document's current output charset, which is used to control
- * which characters are escaped when generating HTML (via the
- * <code>html()</code> methods), and which are kept intact.
- * <p>
- * Where possible (when parsing from a URL or File), the document's
- * output charset is automatically set to the input charset. Otherwise,
- * it defaults to UTF-8.
- *
- * @return the document's current charset.
- */
- public Charset charset() {
- return charset;
- }
-
- /**
- * Update the document's output charset.
- *
- * @param charset
- * the new charset to use.
- * @return the document's output settings, for chaining
- */
- public OutputSettings charset(Charset charset) {
- // todo: this should probably update the doc's meta charset
- this.charset = charset;
- charsetEncoder = charset.newEncoder();
- return this;
- }
-
- /**
- * Update the document's output charset.
- *
- * @param charset
- * the new charset (by name) to use.
- * @return the document's output settings, for chaining
- */
- public OutputSettings charset(String charset) {
- charset(Charset.forName(charset));
- return this;
- }
-
- CharsetEncoder encoder() {
- return charsetEncoder;
- }
-
- /**
- * Get if pretty printing is enabled. Default is true. If disabled, the
- * HTML output methods will not re-format the output, and the output
- * will generally look like the input.
- *
- * @return if pretty printing is enabled.
- */
- public boolean prettyPrint() {
- return prettyPrint;
- }
-
- /**
- * Enable or disable pretty printing.
- *
- * @param pretty
- * new pretty print setting
- * @return this, for chaining
- */
- public OutputSettings prettyPrint(boolean pretty) {
- prettyPrint = pretty;
- return this;
- }
-
- /**
- * Get the current tag indent amount, used when pretty printing.
- *
- * @return the current indent amount
- */
- public int indentAmount() {
- return indentAmount;
- }
-
- /**
- * Set the indent amount for pretty printing
- *
- * @param indentAmount
- * number of spaces to use for indenting each level. Must be
- * >= 0.
- * @return this, for chaining
- */
- public OutputSettings indentAmount(int indentAmount) {
- Validate.isTrue(indentAmount >= 0);
- this.indentAmount = indentAmount;
- return this;
- }
-
- @Override
- public OutputSettings clone() {
- OutputSettings clone;
- try {
- clone = (OutputSettings) super.clone();
- } catch (CloneNotSupportedException e) {
- throw new RuntimeException(e);
- }
- clone.charset(charset.name()); // new charset and charset encoder
- clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
- // indentAmount, prettyPrint are primitives so object.clone() will
- // handle
- return clone;
- }
- }
-
- /**
- * Get the document's current output settings.
- *
- * @return the document's current output settings.
- */
- public OutputSettings outputSettings() {
- return outputSettings;
- }
-
- public enum QuirksMode {
- noQuirks, quirks, limitedQuirks;
- }
-
- public QuirksMode quirksMode() {
- return quirksMode;
- }
-
- public Document quirksMode(QuirksMode quirksMode) {
- this.quirksMode = quirksMode;
- return this;
- }
-}