summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/Jsoup.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/Jsoup.java')
-rw-r--r--server/src/org/jsoup/Jsoup.java293
1 files changed, 0 insertions, 293 deletions
diff --git a/server/src/org/jsoup/Jsoup.java b/server/src/org/jsoup/Jsoup.java
deleted file mode 100644
index b5429d9410..0000000000
--- a/server/src/org/jsoup/Jsoup.java
+++ /dev/null
@@ -1,293 +0,0 @@
-package org.jsoup;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-
-import org.jsoup.helper.DataUtil;
-import org.jsoup.helper.HttpConnection;
-import org.jsoup.nodes.Document;
-import org.jsoup.parser.Parser;
-import org.jsoup.safety.Cleaner;
-import org.jsoup.safety.Whitelist;
-
-/**
- * The core public access point to the jsoup functionality.
- *
- * @author Jonathan Hedley
- */
-public class Jsoup {
- private Jsoup() {
- }
-
- /**
- * Parse HTML into a Document. The parser will make a sensible, balanced
- * document tree out of any HTML.
- *
- * @param html
- * HTML to parse
- * @param baseUri
- * The URL where the HTML was retrieved from. Used to resolve
- * relative URLs to absolute URLs, that occur before the HTML
- * declares a {@code <base href>} tag.
- * @return sane HTML
- */
- public static Document parse(String html, String baseUri) {
- return Parser.parse(html, baseUri);
- }
-
- /**
- * Parse HTML into a Document, using the provided Parser. You can provide an
- * alternate parser, such as a simple XML (non-HTML) parser.
- *
- * @param html
- * HTML to parse
- * @param baseUri
- * The URL where the HTML was retrieved from. Used to resolve
- * relative URLs to absolute URLs, that occur before the HTML
- * declares a {@code <base href>} tag.
- * @param parser
- * alternate {@link Parser#xmlParser() parser} to use.
- * @return sane HTML
- */
- public static Document parse(String html, String baseUri, Parser parser) {
- return parser.parseInput(html, baseUri);
- }
-
- /**
- * Parse HTML into a Document. As no base URI is specified, absolute URL
- * detection relies on the HTML including a {@code <base href>} tag.
- *
- * @param html
- * HTML to parse
- * @return sane HTML
- * @see #parse(String, String)
- */
- public static Document parse(String html) {
- return Parser.parse(html, "");
- }
-
- /**
- * Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML
- * page.
- * <p>
- * Use examples:
- * <ul>
- * <li>
- * <code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code>
- * </li>
- * <li>
- * <code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();
- * </ul>
- *
- * @param url
- * URL to connect to. The protocol must be {@code http} or
- * {@code https}.
- * @return the connection. You can add data, cookies, and headers; set the
- * user-agent, referrer, method; and then execute.
- */
- public static Connection connect(String url) {
- return HttpConnection.connect(url);
- }
-
- /**
- * Parse the contents of a file as HTML.
- *
- * @param in
- * file to load HTML from
- * @param charsetName
- * (optional) character set of file contents. Set to {@code null}
- * to determine from {@code http-equiv} meta tag, if present, or
- * fall back to {@code UTF-8} (which is often safe to do).
- * @param baseUri
- * The URL where the HTML was retrieved from, to resolve relative
- * links against.
- * @return sane HTML
- * @throws IOException
- * if the file could not be found, or read, or if the
- * charsetName is invalid.
- */
- public static Document parse(File in, String charsetName, String baseUri)
- throws IOException {
- return DataUtil.load(in, charsetName, baseUri);
- }
-
- /**
- * Parse the contents of a file as HTML. The location of the file is used as
- * the base URI to qualify relative URLs.
- *
- * @param in
- * file to load HTML from
- * @param charsetName
- * (optional) character set of file contents. Set to {@code null}
- * to determine from {@code http-equiv} meta tag, if present, or
- * fall back to {@code UTF-8} (which is often safe to do).
- * @return sane HTML
- * @throws IOException
- * if the file could not be found, or read, or if the
- * charsetName is invalid.
- * @see #parse(File, String, String)
- */
- public static Document parse(File in, String charsetName)
- throws IOException {
- return DataUtil.load(in, charsetName, in.getAbsolutePath());
- }
-
- /**
- * Read an input stream, and parse it to a Document.
- *
- * @param in
- * input stream to read. Make sure to close it after parsing.
- * @param charsetName
- * (optional) character set of file contents. Set to {@code null}
- * to determine from {@code http-equiv} meta tag, if present, or
- * fall back to {@code UTF-8} (which is often safe to do).
- * @param baseUri
- * The URL where the HTML was retrieved from, to resolve relative
- * links against.
- * @return sane HTML
- * @throws IOException
- * if the file could not be found, or read, or if the
- * charsetName is invalid.
- */
- public static Document parse(InputStream in, String charsetName,
- String baseUri) throws IOException {
- return DataUtil.load(in, charsetName, baseUri);
- }
-
- /**
- * Read an input stream, and parse it to a Document. You can provide an
- * alternate parser, such as a simple XML (non-HTML) parser.
- *
- * @param in
- * input stream to read. Make sure to close it after parsing.
- * @param charsetName
- * (optional) character set of file contents. Set to {@code null}
- * to determine from {@code http-equiv} meta tag, if present, or
- * fall back to {@code UTF-8} (which is often safe to do).
- * @param baseUri
- * The URL where the HTML was retrieved from, to resolve relative
- * links against.
- * @param parser
- * alternate {@link Parser#xmlParser() parser} to use.
- * @return sane HTML
- * @throws IOException
- * if the file could not be found, or read, or if the
- * charsetName is invalid.
- */
- public static Document parse(InputStream in, String charsetName,
- String baseUri, Parser parser) throws IOException {
- return DataUtil.load(in, charsetName, baseUri, parser);
- }
-
- /**
- * Parse a fragment of HTML, with the assumption that it forms the
- * {@code body} of the HTML.
- *
- * @param bodyHtml
- * body HTML fragment
- * @param baseUri
- * URL to resolve relative URLs against.
- * @return sane HTML document
- * @see Document#body()
- */
- public static Document parseBodyFragment(String bodyHtml, String baseUri) {
- return Parser.parseBodyFragment(bodyHtml, baseUri);
- }
-
- /**
- * Parse a fragment of HTML, with the assumption that it forms the
- * {@code body} of the HTML.
- *
- * @param bodyHtml
- * body HTML fragment
- * @return sane HTML document
- * @see Document#body()
- */
- public static Document parseBodyFragment(String bodyHtml) {
- return Parser.parseBodyFragment(bodyHtml, "");
- }
-
- /**
- * Fetch a URL, and parse it as HTML. Provided for compatibility; in most
- * cases use {@link #connect(String)} instead.
- * <p>
- * The encoding character set is determined by the content-type header or
- * http-equiv meta tag, or falls back to {@code UTF-8}.
- *
- * @param url
- * URL to fetch (with a GET). The protocol must be {@code http}
- * or {@code https}.
- * @param timeoutMillis
- * Connection and read timeout, in milliseconds. If exceeded,
- * IOException is thrown.
- * @return The parsed HTML.
- * @throws IOException
- * If the final server response != 200 OK (redirects are
- * followed), or if there's an error reading the response
- * stream.
- * @see #connect(String)
- */
- public static Document parse(URL url, int timeoutMillis) throws IOException {
- Connection con = HttpConnection.connect(url);
- con.timeout(timeoutMillis);
- return con.get();
- }
-
- /**
- * Get safe HTML from untrusted input HTML, by parsing input HTML and
- * filtering it through a white-list of permitted tags and attributes.
- *
- * @param bodyHtml
- * input untrusted HTML
- * @param baseUri
- * URL to resolve relative URLs against
- * @param whitelist
- * white-list of permitted HTML elements
- * @return safe HTML
- * @see Cleaner#clean(Document)
- */
- public static String clean(String bodyHtml, String baseUri,
- Whitelist whitelist) {
- Document dirty = parseBodyFragment(bodyHtml, baseUri);
- Cleaner cleaner = new Cleaner(whitelist);
- Document clean = cleaner.clean(dirty);
- return clean.body().html();
- }
-
- /**
- * Get safe HTML from untrusted input HTML, by parsing input HTML and
- * filtering it through a white-list of permitted tags and attributes.
- *
- * @param bodyHtml
- * input untrusted HTML
- * @param whitelist
- * white-list of permitted HTML elements
- * @return safe HTML
- * @see Cleaner#clean(Document)
- */
- public static String clean(String bodyHtml, Whitelist whitelist) {
- return clean(bodyHtml, "", whitelist);
- }
-
- /**
- * Test if the input HTML has only tags and attributes allowed by the
- * Whitelist. Useful for form validation. The input HTML should still be run
- * through the cleaner to set up enforced attributes, and to tidy the
- * output.
- *
- * @param bodyHtml
- * HTML to test
- * @param whitelist
- * whitelist to test against
- * @return true if no tags or attributes were removed; false otherwise
- * @see #clean(String, org.jsoup.safety.Whitelist)
- */
- public static boolean isValid(String bodyHtml, Whitelist whitelist) {
- Document dirty = parseBodyFragment(bodyHtml, "");
- Cleaner cleaner = new Cleaner(whitelist);
- return cleaner.isValid(dirty);
- }
-
-}