diff options
Diffstat (limited to 'server/src/org/jsoup/nodes/Document.java')
-rw-r--r-- | server/src/org/jsoup/nodes/Document.java | 402 |
1 files changed, 0 insertions, 402 deletions
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java deleted file mode 100644 index f1c4595faa..0000000000 --- a/server/src/org/jsoup/nodes/Document.java +++ /dev/null @@ -1,402 +0,0 @@ -package org.jsoup.nodes; - -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; -import java.util.ArrayList; -import java.util.List; - -import org.jsoup.helper.Validate; -import org.jsoup.parser.Tag; -import org.jsoup.select.Elements; - -/** - * A HTML Document. - * - * @author Jonathan Hedley, jonathan@hedley.net - */ -public class Document extends Element { - private OutputSettings outputSettings = new OutputSettings(); - private QuirksMode quirksMode = QuirksMode.noQuirks; - - /** - * Create a new, empty Document. - * - * @param baseUri - * base URI of document - * @see org.jsoup.Jsoup#parse - * @see #createShell - */ - public Document(String baseUri) { - super(Tag.valueOf("#root"), baseUri); - } - - /** - * Create a valid, empty shell of a document, suitable for adding more - * elements to. - * - * @param baseUri - * baseUri of document - * @return document with html, head, and body elements. - */ - static public Document createShell(String baseUri) { - Validate.notNull(baseUri); - - Document doc = new Document(baseUri); - Element html = doc.appendElement("html"); - html.appendElement("head"); - html.appendElement("body"); - - return doc; - } - - /** - * Accessor to the document's {@code head} element. - * - * @return {@code head} - */ - public Element head() { - return findFirstElementByTagName("head", this); - } - - /** - * Accessor to the document's {@code body} element. - * - * @return {@code body} - */ - public Element body() { - return findFirstElementByTagName("body", this); - } - - /** - * Get the string contents of the document's {@code title} element. - * - * @return Trimmed title, or empty string if none set. - */ - public String title() { - Element titleEl = getElementsByTag("title").first(); - return titleEl != null ? titleEl.text().trim() : ""; - } - - /** - * Set the document's {@code title} element. Updates the existing element, - * or adds {@code title} to {@code head} if not present - * - * @param title - * string to set as title - */ - public void title(String title) { - Validate.notNull(title); - Element titleEl = getElementsByTag("title").first(); - if (titleEl == null) { // add to head - head().appendElement("title").text(title); - } else { - titleEl.text(title); - } - } - - /** - * Create a new Element, with this document's base uri. Does not make the - * new element a child of this document. - * - * @param tagName - * element tag name (e.g. {@code a}) - * @return new element - */ - public Element createElement(String tagName) { - return new Element(Tag.valueOf(tagName), baseUri()); - } - - /** - * Normalise the document. This happens after the parse phase so generally - * does not need to be called. Moves any text content that is not in the - * body element into the body. - * - * @return this document after normalisation - */ - public Document normalise() { - Element htmlEl = findFirstElementByTagName("html", this); - if (htmlEl == null) { - htmlEl = appendElement("html"); - } - if (head() == null) { - htmlEl.prependElement("head"); - } - if (body() == null) { - htmlEl.appendElement("body"); - } - - // pull text nodes out of root, html, and head els, and push into body. - // non-text nodes are already taken care - // of. do in inverse order to maintain text order. - normaliseTextNodes(head()); - normaliseTextNodes(htmlEl); - normaliseTextNodes(this); - - normaliseStructure("head", htmlEl); - normaliseStructure("body", htmlEl); - - return this; - } - - // does not recurse. - private void normaliseTextNodes(Element element) { - List<Node> toMove = new ArrayList<Node>(); - for (Node node : element.childNodes) { - if (node instanceof TextNode) { - TextNode tn = (TextNode) node; - if (!tn.isBlank()) { - toMove.add(tn); - } - } - } - - for (int i = toMove.size() - 1; i >= 0; i--) { - Node node = toMove.get(i); - element.removeChild(node); - body().prependChild(new TextNode(" ", "")); - body().prependChild(node); - } - } - - // merge multiple <head> or <body> contents into one, delete the remainder, - // and ensure they are owned by <html> - private void normaliseStructure(String tag, Element htmlEl) { - Elements elements = getElementsByTag(tag); - Element master = elements.first(); // will always be available as - // created above if not existent - if (elements.size() > 1) { // dupes, move contents to master - List<Node> toMove = new ArrayList<Node>(); - for (int i = 1; i < elements.size(); i++) { - Node dupe = elements.get(i); - for (Node node : dupe.childNodes) { - toMove.add(node); - } - dupe.remove(); - } - - for (Node dupe : toMove) { - master.appendChild(dupe); - } - } - // ensure parented by <html> - if (!master.parent().equals(htmlEl)) { - htmlEl.appendChild(master); // includes remove() - } - } - - // fast method to get first by tag name, used for html, head, body finders - private Element findFirstElementByTagName(String tag, Node node) { - if (node.nodeName().equals(tag)) { - return (Element) node; - } else { - for (Node child : node.childNodes) { - Element found = findFirstElementByTagName(tag, child); - if (found != null) { - return found; - } - } - } - return null; - } - - @Override - public String outerHtml() { - return super.html(); // no outer wrapper tag - } - - /** - * Set the text of the {@code body} of this document. Any existing nodes - * within the body will be cleared. - * - * @param text - * unencoded text - * @return this document - */ - @Override - public Element text(String text) { - body().text(text); // overridden to not nuke doc structure - return this; - } - - @Override - public String nodeName() { - return "#document"; - } - - @Override - public Document clone() { - Document clone = (Document) super.clone(); - clone.outputSettings = outputSettings.clone(); - return clone; - } - - /** - * A Document's output settings control the form of the text() and html() - * methods. - */ - public static class OutputSettings implements Cloneable { - private Entities.EscapeMode escapeMode = Entities.EscapeMode.base; - private Charset charset = Charset.forName("UTF-8"); - private CharsetEncoder charsetEncoder = charset.newEncoder(); - private boolean prettyPrint = true; - private int indentAmount = 1; - - public OutputSettings() { - } - - /** - * Get the document's current HTML escape mode: <code>base</code>, which - * provides a limited set of named HTML entities and escapes other - * characters as numbered entities for maximum compatibility; or - * <code>extended</code>, which uses the complete set of HTML named - * entities. - * <p> - * The default escape mode is <code>base</code>. - * - * @return the document's current escape mode - */ - public Entities.EscapeMode escapeMode() { - return escapeMode; - } - - /** - * Set the document's escape mode - * - * @param escapeMode - * the new escape mode to use - * @return the document's output settings, for chaining - */ - public OutputSettings escapeMode(Entities.EscapeMode escapeMode) { - this.escapeMode = escapeMode; - return this; - } - - /** - * Get the document's current output charset, which is used to control - * which characters are escaped when generating HTML (via the - * <code>html()</code> methods), and which are kept intact. - * <p> - * Where possible (when parsing from a URL or File), the document's - * output charset is automatically set to the input charset. Otherwise, - * it defaults to UTF-8. - * - * @return the document's current charset. - */ - public Charset charset() { - return charset; - } - - /** - * Update the document's output charset. - * - * @param charset - * the new charset to use. - * @return the document's output settings, for chaining - */ - public OutputSettings charset(Charset charset) { - // todo: this should probably update the doc's meta charset - this.charset = charset; - charsetEncoder = charset.newEncoder(); - return this; - } - - /** - * Update the document's output charset. - * - * @param charset - * the new charset (by name) to use. - * @return the document's output settings, for chaining - */ - public OutputSettings charset(String charset) { - charset(Charset.forName(charset)); - return this; - } - - CharsetEncoder encoder() { - return charsetEncoder; - } - - /** - * Get if pretty printing is enabled. Default is true. If disabled, the - * HTML output methods will not re-format the output, and the output - * will generally look like the input. - * - * @return if pretty printing is enabled. - */ - public boolean prettyPrint() { - return prettyPrint; - } - - /** - * Enable or disable pretty printing. - * - * @param pretty - * new pretty print setting - * @return this, for chaining - */ - public OutputSettings prettyPrint(boolean pretty) { - prettyPrint = pretty; - return this; - } - - /** - * Get the current tag indent amount, used when pretty printing. - * - * @return the current indent amount - */ - public int indentAmount() { - return indentAmount; - } - - /** - * Set the indent amount for pretty printing - * - * @param indentAmount - * number of spaces to use for indenting each level. Must be - * >= 0. - * @return this, for chaining - */ - public OutputSettings indentAmount(int indentAmount) { - Validate.isTrue(indentAmount >= 0); - this.indentAmount = indentAmount; - return this; - } - - @Override - public OutputSettings clone() { - OutputSettings clone; - try { - clone = (OutputSettings) super.clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - clone.charset(charset.name()); // new charset and charset encoder - clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); - // indentAmount, prettyPrint are primitives so object.clone() will - // handle - return clone; - } - } - - /** - * Get the document's current output settings. - * - * @return the document's current output settings. - */ - public OutputSettings outputSettings() { - return outputSettings; - } - - public enum QuirksMode { - noQuirks, quirks, limitedQuirks; - } - - public QuirksMode quirksMode() { - return quirksMode; - } - - public Document quirksMode(QuirksMode quirksMode) { - this.quirksMode = quirksMode; - return this; - } -} |