summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup')
-rw-r--r--server/src/org/jsoup/Connection.java306
-rw-r--r--server/src/org/jsoup/Jsoup.java324
-rw-r--r--server/src/org/jsoup/examples/HtmlToPlainText.java63
-rw-r--r--server/src/org/jsoup/examples/ListLinks.java28
-rw-r--r--server/src/org/jsoup/helper/DataUtil.java149
-rw-r--r--server/src/org/jsoup/helper/DescendableLinkedList.java25
-rw-r--r--server/src/org/jsoup/helper/HttpConnection.java360
-rw-r--r--server/src/org/jsoup/helper/StringUtil.java79
-rw-r--r--server/src/org/jsoup/helper/Validate.java96
-rw-r--r--server/src/org/jsoup/nodes/Attribute.java110
-rw-r--r--server/src/org/jsoup/nodes/Attributes.java194
-rw-r--r--server/src/org/jsoup/nodes/Comment.java40
-rw-r--r--server/src/org/jsoup/nodes/DataNode.java54
-rw-r--r--server/src/org/jsoup/nodes/Document.java192
-rw-r--r--server/src/org/jsoup/nodes/DocumentType.java28
-rw-r--r--server/src/org/jsoup/nodes/Element.java790
-rw-r--r--server/src/org/jsoup/nodes/Entities.java111
-rw-r--r--server/src/org/jsoup/nodes/Node.java402
-rw-r--r--server/src/org/jsoup/nodes/TextNode.java93
-rw-r--r--server/src/org/jsoup/nodes/XmlDeclaration.java49
-rw-r--r--server/src/org/jsoup/parser/CharacterReader.java54
-rw-r--r--server/src/org/jsoup/parser/HtmlTreeBuilder.java286
-rw-r--r--server/src/org/jsoup/parser/HtmlTreeBuilderState.java1645
-rw-r--r--server/src/org/jsoup/parser/ParseError.java7
-rw-r--r--server/src/org/jsoup/parser/ParseErrorList.java8
-rw-r--r--server/src/org/jsoup/parser/Parser.java123
-rw-r--r--server/src/org/jsoup/parser/Tag.java148
-rw-r--r--server/src/org/jsoup/parser/Token.java37
-rw-r--r--server/src/org/jsoup/parser/TokenQueue.java274
-rw-r--r--server/src/org/jsoup/parser/Tokeniser.java104
-rw-r--r--server/src/org/jsoup/parser/TokeniserState.java2260
-rw-r--r--server/src/org/jsoup/parser/TreeBuilder.java15
-rw-r--r--server/src/org/jsoup/parser/XmlTreeBuilder.java72
-rw-r--r--server/src/org/jsoup/parser/package-info.java1
-rw-r--r--server/src/org/jsoup/safety/Cleaner.java106
-rw-r--r--server/src/org/jsoup/safety/Whitelist.java380
-rw-r--r--server/src/org/jsoup/safety/package-info.java1
-rw-r--r--server/src/org/jsoup/select/Collector.java21
-rw-r--r--server/src/org/jsoup/select/CombiningEvaluator.java30
-rw-r--r--server/src/org/jsoup/select/Elements.java442
-rw-r--r--server/src/org/jsoup/select/Evaluator.java52
-rw-r--r--server/src/org/jsoup/select/NodeTraversor.java20
-rw-r--r--server/src/org/jsoup/select/NodeVisitor.java33
-rw-r--r--server/src/org/jsoup/select/QueryParser.java193
-rw-r--r--server/src/org/jsoup/select/Selector.java248
-rw-r--r--server/src/org/jsoup/select/StructuralEvaluator.java34
46 files changed, 6158 insertions, 3929 deletions
diff --git a/server/src/org/jsoup/Connection.java b/server/src/org/jsoup/Connection.java
index 564eeb89b7..1d9879bfb3 100644
--- a/server/src/org/jsoup/Connection.java
+++ b/server/src/org/jsoup/Connection.java
@@ -1,24 +1,29 @@
package org.jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.parser.Parser;
-
+import java.io.IOException;
import java.net.URL;
-import java.util.Map;
import java.util.Collection;
-import java.io.IOException;
+import java.util.Map;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.parser.Parser;
/**
- * A Connection provides a convenient interface to fetch content from the web, and parse them into Documents.
+ * A Connection provides a convenient interface to fetch content from the web,
+ * and parse them into Documents.
* <p>
- * To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}. Connections contain {@link Connection.Request}
- * and {@link Connection.Response} objects. The request objects are reusable as prototype requests.
+ * To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}.
+ * Connections contain {@link Connection.Request} and
+ * {@link Connection.Response} objects. The request objects are reusable as
+ * prototype requests.
* <p>
- * Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}),
- * or by methods in the Connection.Request object directly. All request configuration must be made before the request
- * is executed.
+ * Request configuration can be made using either the shortcut methods in
+ * Connection (e.g. {@link #userAgent(String)}), or by methods in the
+ * Connection.Request object directly. All request configuration must be made
+ * before the request is executed.
* <p>
- * The Connection interface is <b>currently in beta</b> and subject to change. Comments, suggestions, and bug reports are welcome.
+ * The Connection interface is <b>currently in beta</b> and subject to change.
+ * Comments, suggestions, and bug reports are welcome.
*/
public interface Connection {
@@ -31,102 +36,140 @@ public interface Connection {
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
- * @param url URL to connect to
+ *
+ * @param url
+ * URL to connect to
* @return this Connection, for chaining
*/
public Connection url(URL url);
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
- * @param url URL to connect to
+ *
+ * @param url
+ * URL to connect to
* @return this Connection, for chaining
*/
public Connection url(String url);
/**
* Set the request user-agent header.
- * @param userAgent user-agent to use
+ *
+ * @param userAgent
+ * user-agent to use
* @return this Connection, for chaining
*/
public Connection userAgent(String userAgent);
/**
- * Set the request timeouts (connect and read). If a timeout occurs, an IOException will be thrown. The default
- * timeout is 3 seconds (3000 millis). A timeout of zero is treated as an infinite timeout.
- * @param millis number of milliseconds (thousandths of a second) before timing out connects or reads.
+ * Set the request timeouts (connect and read). If a timeout occurs, an
+ * IOException will be thrown. The default timeout is 3 seconds (3000
+ * millis). A timeout of zero is treated as an infinite timeout.
+ *
+ * @param millis
+ * number of milliseconds (thousandths of a second) before timing
+ * out connects or reads.
* @return this Connection, for chaining
*/
public Connection timeout(int millis);
/**
* Set the request referrer (aka "referer") header.
- * @param referrer referrer to use
+ *
+ * @param referrer
+ * referrer to use
* @return this Connection, for chaining
*/
public Connection referrer(String referrer);
/**
- * Configures the connection to (not) follow server redirects. By default this is <b>true</b>.
- * @param followRedirects true if server redirects should be followed.
+ * Configures the connection to (not) follow server redirects. By default
+ * this is <b>true</b>.
+ *
+ * @param followRedirects
+ * true if server redirects should be followed.
* @return this Connection, for chaining
*/
public Connection followRedirects(boolean followRedirects);
/**
* Set the request method to use, GET or POST. Default is GET.
- * @param method HTTP request method
+ *
+ * @param method
+ * HTTP request method
* @return this Connection, for chaining
*/
public Connection method(Method method);
/**
- * Configures the connection to not throw exceptions when a HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By
- * default this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the
- * response is populated with the error body, and the status message will reflect the error.
- * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored.
+ * Configures the connection to not throw exceptions when a HTTP error
+ * occurs. (4xx - 5xx, e.g. 404 or 500). By default this is <b>false</b>; an
+ * IOException is thrown if an error is encountered. If set to <b>true</b>,
+ * the response is populated with the error body, and the status message
+ * will reflect the error.
+ *
+ * @param ignoreHttpErrors
+ * - false (default) if HTTP errors should be ignored.
* @return this Connection, for chaining
*/
public Connection ignoreHttpErrors(boolean ignoreHttpErrors);
/**
- * Ignore the document's Content-Type when parsing the response. By default this is <b>false</b>, an unrecognised
- * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse
- * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type.
- * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a
- * Document.
+ * Ignore the document's Content-Type when parsing the response. By default
+ * this is <b>false</b>, an unrecognised content-type will cause an
+ * IOException to be thrown. (This is to prevent producing garbage by
+ * attempting to parse a JPEG binary image, for example.) Set to true to
+ * force a parse attempt regardless of content type.
+ *
+ * @param ignoreContentType
+ * set to true if you would like the content type ignored on
+ * parsing the response into a Document.
* @return this Connection, for chaining
*/
public Connection ignoreContentType(boolean ignoreContentType);
/**
- * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the request
- * body for POSTs. A request may have multiple values of the same name.
- * @param key data key
- * @param value data value
+ * Add a request data parameter. Request parameters are sent in the request
+ * query string for GETs, and in the request body for POSTs. A request may
+ * have multiple values of the same name.
+ *
+ * @param key
+ * data key
+ * @param value
+ * data value
* @return this Connection, for chaining
*/
public Connection data(String key, String value);
/**
* Adds all of the supplied data to the request data parameters
- * @param data map of data parameters
+ *
+ * @param data
+ * map of data parameters
* @return this Connection, for chaining
*/
public Connection data(Map<String, String> data);
/**
- * Add a number of request data parameters. Multiple parameters may be set at once, e.g.:
- * <code>.data("name", "jsoup", "language", "Java", "language", "English");</code> creates a query string like:
+ * Add a number of request data parameters. Multiple parameters may be set
+ * at once, e.g.:
+ * <code>.data("name", "jsoup", "language", "Java", "language", "English");</code>
+ * creates a query string like:
* <code>?name=jsoup&language=Java&language=English</code>
- * @param keyvals a set of key value pairs.
+ *
+ * @param keyvals
+ * a set of key value pairs.
* @return this Connection, for chaining
*/
public Connection data(String... keyvals);
/**
* Set a request header.
- * @param name header name
- * @param value header value
+ *
+ * @param name
+ * header name
+ * @param value
+ * header value
* @return this Connection, for chaining
* @see org.jsoup.Connection.Request#headers()
*/
@@ -134,111 +177,141 @@ public interface Connection {
/**
* Set a cookie to be sent in the request.
- * @param name name of cookie
- * @param value value of cookie
+ *
+ * @param name
+ * name of cookie
+ * @param value
+ * value of cookie
* @return this Connection, for chaining
*/
public Connection cookie(String name, String value);
/**
* Adds each of the supplied cookies to the request.
- * @param cookies map of cookie name -> value pairs
+ *
+ * @param cookies
+ * map of cookie name -> value pairs
* @return this Connection, for chaining
*/
public Connection cookies(Map<String, String> cookies);
/**
- * Provide an alternate parser to use when parsing the response to a Document.
- * @param parser alternate parser
+ * Provide an alternate parser to use when parsing the response to a
+ * Document.
+ *
+ * @param parser
+ * alternate parser
* @return this Connection, for chaining
*/
public Connection parser(Parser parser);
/**
* Execute the request as a GET, and parse the result.
+ *
* @return parsed Document
- * @throws IOException on error
+ * @throws IOException
+ * on error
*/
public Document get() throws IOException;
/**
* Execute the request as a POST, and parse the result.
+ *
* @return parsed Document
- * @throws IOException on error
+ * @throws IOException
+ * on error
*/
public Document post() throws IOException;
/**
* Execute the request.
+ *
* @return a response object
- * @throws IOException on error
+ * @throws IOException
+ * on error
*/
public Response execute() throws IOException;
/**
* Get the request object associated with this connection
+ *
* @return request
*/
public Request request();
/**
* Set the connection's request
- * @param request new request object
+ *
+ * @param request
+ * new request object
* @return this Connection, for chaining
*/
public Connection request(Request request);
/**
* Get the response, once the request has been executed
+ *
* @return response
*/
public Response response();
/**
* Set the connection's response
- * @param response new response
+ *
+ * @param response
+ * new response
* @return this Connection, for chaining
*/
public Connection response(Response response);
-
/**
* Common methods for Requests and Responses
- * @param <T> Type of Base, either Request or Response
+ *
+ * @param <T>
+ * Type of Base, either Request or Response
*/
interface Base<T extends Base> {
/**
* Get the URL
+ *
* @return URL
*/
public URL url();
/**
* Set the URL
- * @param url new URL
+ *
+ * @param url
+ * new URL
* @return this, for chaining
*/
public T url(URL url);
/**
* Get the request method
+ *
* @return method
*/
public Method method();
/**
* Set the request method
- * @param method new method
+ *
+ * @param method
+ * new method
* @return this, for chaining
*/
public T method(Method method);
/**
- * Get the value of a header. This is a simplified header model, where a header may only have one value.
+ * Get the value of a header. This is a simplified header model, where a
+ * header may only have one value.
* <p>
* Header names are case insensitive.
- * @param name name of header (case insensitive)
+ *
+ * @param name
+ * name of header (case insensitive)
* @return value of header, or null if not set.
* @see #hasHeader(String)
* @see #cookie(String)
@@ -246,29 +319,38 @@ public interface Connection {
public String header(String name);
/**
- * Set a header. This method will overwrite any existing header with the same case insensitive name.
- * @param name Name of header
- * @param value Value of header
+ * Set a header. This method will overwrite any existing header with the
+ * same case insensitive name.
+ *
+ * @param name
+ * Name of header
+ * @param value
+ * Value of header
* @return this, for chaining
*/
public T header(String name, String value);
/**
* Check if a header is present
- * @param name name of header (case insensitive)
+ *
+ * @param name
+ * name of header (case insensitive)
* @return if the header is present in this request/response
*/
public boolean hasHeader(String name);
/**
* Remove a header by name
- * @param name name of header to remove (case insensitive)
+ *
+ * @param name
+ * name of header to remove (case insensitive)
* @return this, for chaining
*/
public T removeHeader(String name);
/**
* Retrieve all of the request/response headers as a map
+ *
* @return headers
*/
public Map<String, String> headers();
@@ -276,37 +358,48 @@ public interface Connection {
/**
* Get a cookie value by name from this request/response.
* <p>
- * Response objects have a simplified cookie model. Each cookie set in the response is added to the response
- * object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
- * @param name name of cookie to retrieve.
+ * Response objects have a simplified cookie model. Each cookie set in
+ * the response is added to the response object's cookie key=value map.
+ * The cookie's path, domain, and expiry date are ignored.
+ *
+ * @param name
+ * name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
public String cookie(String name);
/**
* Set a cookie in this request/response.
- * @param name name of cookie
- * @param value value of cookie
+ *
+ * @param name
+ * name of cookie
+ * @param value
+ * value of cookie
* @return this, for chaining
*/
public T cookie(String name, String value);
/**
* Check if a cookie is present
- * @param name name of cookie
+ *
+ * @param name
+ * name of cookie
* @return if the cookie is present in this request/response
*/
public boolean hasCookie(String name);
/**
* Remove a cookie by name
- * @param name name of cookie to remove
+ *
+ * @param name
+ * name of cookie to remove
* @return this, for chaining
*/
public T removeCookie(String name);
/**
* Retrieve all of the request/response cookies as a map
+ *
* @return cookies
*/
public Map<String, String> cookies();
@@ -319,79 +412,99 @@ public interface Connection {
public interface Request extends Base<Request> {
/**
* Get the request timeout, in milliseconds.
+ *
* @return the timeout in milliseconds.
*/
public int timeout();
/**
* Update the request timeout.
- * @param millis timeout, in milliseconds
+ *
+ * @param millis
+ * timeout, in milliseconds
* @return this Request, for chaining
*/
public Request timeout(int millis);
/**
* Get the current followRedirects configuration.
+ *
* @return true if followRedirects is enabled.
*/
public boolean followRedirects();
/**
- * Configures the request to (not) follow server redirects. By default this is <b>true</b>.
- *
- * @param followRedirects true if server redirects should be followed.
+ * Configures the request to (not) follow server redirects. By default
+ * this is <b>true</b>.
+ *
+ * @param followRedirects
+ * true if server redirects should be followed.
* @return this Request, for chaining
*/
public Request followRedirects(boolean followRedirects);
/**
* Get the current ignoreHttpErrors configuration.
- * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be thrown.
+ *
+ * @return true if errors will be ignored; false (default) if HTTP
+ * errors will cause an IOException to be thrown.
*/
public boolean ignoreHttpErrors();
- /**
- * Configures the request to ignore HTTP errors in the response.
- * @param ignoreHttpErrors set to true to ignore HTTP errors.
+ /**
+ * Configures the request to ignore HTTP errors in the response.
+ *
+ * @param ignoreHttpErrors
+ * set to true to ignore HTTP errors.
* @return this Request, for chaining
- */
+ */
public Request ignoreHttpErrors(boolean ignoreHttpErrors);
/**
* Get the current ignoreContentType configuration.
- * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to be thrown.
+ *
+ * @return true if invalid content-types will be ignored; false
+ * (default) if they will cause an IOException to be thrown.
*/
public boolean ignoreContentType();
/**
- * Configures the request to ignore the Content-Type of the response.
- * @param ignoreContentType set to true to ignore the content type.
+ * Configures the request to ignore the Content-Type of the response.
+ *
+ * @param ignoreContentType
+ * set to true to ignore the content type.
* @return this Request, for chaining
- */
+ */
public Request ignoreContentType(boolean ignoreContentType);
/**
* Add a data parameter to the request
- * @param keyval data to add.
+ *
+ * @param keyval
+ * data to add.
* @return this Request, for chaining
*/
public Request data(KeyVal keyval);
/**
* Get all of the request's data parameters
+ *
* @return collection of keyvals
*/
public Collection<KeyVal> data();
/**
* Specify the parser to use when parsing the document.
- * @param parser parser to use.
+ *
+ * @param parser
+ * parser to use.
* @return this Request, for chaining
*/
public Request parser(Parser parser);
/**
* Get the current parser to use when parsing the document.
+ *
* @return current Parser
*/
public Parser parser();
@@ -401,46 +514,54 @@ public interface Connection {
* Represents a HTTP response.
*/
public interface Response extends Base<Response> {
-
- /**
+
+ /**
* Get the status code of the response.
+ *
* @return status code
*/
public int statusCode();
/**
* Get the status message of the response.
+ *
* @return status message
*/
public String statusMessage();
/**
* Get the character set name of the response.
+ *
* @return character set name
*/
public String charset();
/**
* Get the response content type (e.g. "text/html");
+ *
* @return the response content type
*/
public String contentType();
/**
* Parse the body of the response as a Document.
+ *
* @return a parsed Document
- * @throws IOException on error
+ * @throws IOException
+ * on error
*/
public Document parse() throws IOException;
/**
* Get the body of the response as a plain string.
+ *
* @return body
*/
public String body();
/**
* Get the body of the response as an array of bytes.
+ *
* @return body bytes
*/
public byte[] bodyAsBytes();
@@ -453,29 +574,34 @@ public interface Connection {
/**
* Update the key of a keyval
- * @param key new key
+ *
+ * @param key
+ * new key
* @return this KeyVal, for chaining
*/
public KeyVal key(String key);
/**
* Get the key of a keyval
+ *
* @return the key
*/
public String key();
/**
* Update the value of a keyval
- * @param value the new value
+ *
+ * @param value
+ * the new value
* @return this KeyVal, for chaining
*/
public KeyVal value(String value);
/**
* Get the value of a keyval
+ *
* @return the value
*/
public String value();
}
}
-
diff --git a/server/src/org/jsoup/Jsoup.java b/server/src/org/jsoup/Jsoup.java
index 8c6afcee36..b5429d9410 100644
--- a/server/src/org/jsoup/Jsoup.java
+++ b/server/src/org/jsoup/Jsoup.java
@@ -1,178 +1,233 @@
package org.jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.parser.Parser;
-import org.jsoup.safety.Cleaner;
-import org.jsoup.safety.Whitelist;
-import org.jsoup.helper.DataUtil;
-import org.jsoup.helper.HttpConnection;
-
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
-/**
- The core public access point to the jsoup functionality.
+import org.jsoup.helper.DataUtil;
+import org.jsoup.helper.HttpConnection;
+import org.jsoup.nodes.Document;
+import org.jsoup.parser.Parser;
+import org.jsoup.safety.Cleaner;
+import org.jsoup.safety.Whitelist;
- @author Jonathan Hedley */
+/**
+ * The core public access point to the jsoup functionality.
+ *
+ * @author Jonathan Hedley
+ */
public class Jsoup {
- private Jsoup() {}
+ private Jsoup() {
+ }
/**
- Parse HTML into a Document. The parser will make a sensible, balanced document tree out of any HTML.
-
- @param html HTML to parse
- @param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
- before the HTML declares a {@code <base href>} tag.
- @return sane HTML
+ * Parse HTML into a Document. The parser will make a sensible, balanced
+ * document tree out of any HTML.
+ *
+ * @param html
+ * HTML to parse
+ * @param baseUri
+ * The URL where the HTML was retrieved from. Used to resolve
+ * relative URLs to absolute URLs, that occur before the HTML
+ * declares a {@code <base href>} tag.
+ * @return sane HTML
*/
public static Document parse(String html, String baseUri) {
return Parser.parse(html, baseUri);
}
/**
- Parse HTML into a Document, using the provided Parser. You can provide an alternate parser, such as a simple XML
- (non-HTML) parser.
-
- @param html HTML to parse
- @param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
- before the HTML declares a {@code <base href>} tag.
- @param parser alternate {@link Parser#xmlParser() parser} to use.
- @return sane HTML
+ * Parse HTML into a Document, using the provided Parser. You can provide an
+ * alternate parser, such as a simple XML (non-HTML) parser.
+ *
+ * @param html
+ * HTML to parse
+ * @param baseUri
+ * The URL where the HTML was retrieved from. Used to resolve
+ * relative URLs to absolute URLs, that occur before the HTML
+ * declares a {@code <base href>} tag.
+ * @param parser
+ * alternate {@link Parser#xmlParser() parser} to use.
+ * @return sane HTML
*/
public static Document parse(String html, String baseUri, Parser parser) {
return parser.parseInput(html, baseUri);
}
/**
- Parse HTML into a Document. As no base URI is specified, absolute URL detection relies on the HTML including a
- {@code <base href>} tag.
-
- @param html HTML to parse
- @return sane HTML
-
- @see #parse(String, String)
+ * Parse HTML into a Document. As no base URI is specified, absolute URL
+ * detection relies on the HTML including a {@code <base href>} tag.
+ *
+ * @param html
+ * HTML to parse
+ * @return sane HTML
+ * @see #parse(String, String)
*/
public static Document parse(String html) {
return Parser.parse(html, "");
}
/**
- * Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML page.
+ * Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML
+ * page.
* <p>
* Use examples:
* <ul>
- * <li><code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code></li>
- * <li><code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();
+ * <li>
+ * <code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code>
+ * </li>
+ * <li>
+ * <code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();
* </ul>
- * @param url URL to connect to. The protocol must be {@code http} or {@code https}.
- * @return the connection. You can add data, cookies, and headers; set the user-agent, referrer, method; and then execute.
+ *
+ * @param url
+ * URL to connect to. The protocol must be {@code http} or
+ * {@code https}.
+ * @return the connection. You can add data, cookies, and headers; set the
+ * user-agent, referrer, method; and then execute.
*/
public static Connection connect(String url) {
return HttpConnection.connect(url);
}
/**
- Parse the contents of a file as HTML.
-
- @param in file to load HTML from
- @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
- present, or fall back to {@code UTF-8} (which is often safe to do).
- @param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
- @return sane HTML
-
- @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ * Parse the contents of a file as HTML.
+ *
+ * @param in
+ * file to load HTML from
+ * @param charsetName
+ * (optional) character set of file contents. Set to {@code null}
+ * to determine from {@code http-equiv} meta tag, if present, or
+ * fall back to {@code UTF-8} (which is often safe to do).
+ * @param baseUri
+ * The URL where the HTML was retrieved from, to resolve relative
+ * links against.
+ * @return sane HTML
+ * @throws IOException
+ * if the file could not be found, or read, or if the
+ * charsetName is invalid.
*/
- public static Document parse(File in, String charsetName, String baseUri) throws IOException {
+ public static Document parse(File in, String charsetName, String baseUri)
+ throws IOException {
return DataUtil.load(in, charsetName, baseUri);
}
/**
- Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.
-
- @param in file to load HTML from
- @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
- present, or fall back to {@code UTF-8} (which is often safe to do).
- @return sane HTML
-
- @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
- @see #parse(File, String, String)
+ * Parse the contents of a file as HTML. The location of the file is used as
+ * the base URI to qualify relative URLs.
+ *
+ * @param in
+ * file to load HTML from
+ * @param charsetName
+ * (optional) character set of file contents. Set to {@code null}
+ * to determine from {@code http-equiv} meta tag, if present, or
+ * fall back to {@code UTF-8} (which is often safe to do).
+ * @return sane HTML
+ * @throws IOException
+ * if the file could not be found, or read, or if the
+ * charsetName is invalid.
+ * @see #parse(File, String, String)
*/
- public static Document parse(File in, String charsetName) throws IOException {
+ public static Document parse(File in, String charsetName)
+ throws IOException {
return DataUtil.load(in, charsetName, in.getAbsolutePath());
}
- /**
- Read an input stream, and parse it to a Document.
-
- @param in input stream to read. Make sure to close it after parsing.
- @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
- present, or fall back to {@code UTF-8} (which is often safe to do).
- @param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
- @return sane HTML
-
- @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ /**
+ * Read an input stream, and parse it to a Document.
+ *
+ * @param in
+ * input stream to read. Make sure to close it after parsing.
+ * @param charsetName
+ * (optional) character set of file contents. Set to {@code null}
+ * to determine from {@code http-equiv} meta tag, if present, or
+ * fall back to {@code UTF-8} (which is often safe to do).
+ * @param baseUri
+ * The URL where the HTML was retrieved from, to resolve relative
+ * links against.
+ * @return sane HTML
+ * @throws IOException
+ * if the file could not be found, or read, or if the
+ * charsetName is invalid.
*/
- public static Document parse(InputStream in, String charsetName, String baseUri) throws IOException {
+ public static Document parse(InputStream in, String charsetName,
+ String baseUri) throws IOException {
return DataUtil.load(in, charsetName, baseUri);
}
/**
- Read an input stream, and parse it to a Document. You can provide an alternate parser, such as a simple XML
- (non-HTML) parser.
-
- @param in input stream to read. Make sure to close it after parsing.
- @param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
- present, or fall back to {@code UTF-8} (which is often safe to do).
- @param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
- @param parser alternate {@link Parser#xmlParser() parser} to use.
- @return sane HTML
-
- @throws IOException if the file could not be found, or read, or if the charsetName is invalid.
+ * Read an input stream, and parse it to a Document. You can provide an
+ * alternate parser, such as a simple XML (non-HTML) parser.
+ *
+ * @param in
+ * input stream to read. Make sure to close it after parsing.
+ * @param charsetName
+ * (optional) character set of file contents. Set to {@code null}
+ * to determine from {@code http-equiv} meta tag, if present, or
+ * fall back to {@code UTF-8} (which is often safe to do).
+ * @param baseUri
+ * The URL where the HTML was retrieved from, to resolve relative
+ * links against.
+ * @param parser
+ * alternate {@link Parser#xmlParser() parser} to use.
+ * @return sane HTML
+ * @throws IOException
+ * if the file could not be found, or read, or if the
+ * charsetName is invalid.
*/
- public static Document parse(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
+ public static Document parse(InputStream in, String charsetName,
+ String baseUri, Parser parser) throws IOException {
return DataUtil.load(in, charsetName, baseUri, parser);
}
/**
- Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
-
- @param bodyHtml body HTML fragment
- @param baseUri URL to resolve relative URLs against.
- @return sane HTML document
-
- @see Document#body()
+ * Parse a fragment of HTML, with the assumption that it forms the
+ * {@code body} of the HTML.
+ *
+ * @param bodyHtml
+ * body HTML fragment
+ * @param baseUri
+ * URL to resolve relative URLs against.
+ * @return sane HTML document
+ * @see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
return Parser.parseBodyFragment(bodyHtml, baseUri);
}
/**
- Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.
-
- @param bodyHtml body HTML fragment
- @return sane HTML document
-
- @see Document#body()
+ * Parse a fragment of HTML, with the assumption that it forms the
+ * {@code body} of the HTML.
+ *
+ * @param bodyHtml
+ * body HTML fragment
+ * @return sane HTML document
+ * @see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml) {
return Parser.parseBodyFragment(bodyHtml, "");
}
/**
- Fetch a URL, and parse it as HTML. Provided for compatibility; in most cases use {@link #connect(String)} instead.
- <p>
- The encoding character set is determined by the content-type header or http-equiv meta tag, or falls back to {@code UTF-8}.
-
- @param url URL to fetch (with a GET). The protocol must be {@code http} or {@code https}.
- @param timeoutMillis Connection and read timeout, in milliseconds. If exceeded, IOException is thrown.
- @return The parsed HTML.
-
- @throws IOException If the final server response != 200 OK (redirects are followed), or if there's an error reading
- the response stream.
-
- @see #connect(String)
+ * Fetch a URL, and parse it as HTML. Provided for compatibility; in most
+ * cases use {@link #connect(String)} instead.
+ * <p>
+ * The encoding character set is determined by the content-type header or
+ * http-equiv meta tag, or falls back to {@code UTF-8}.
+ *
+ * @param url
+ * URL to fetch (with a GET). The protocol must be {@code http}
+ * or {@code https}.
+ * @param timeoutMillis
+ * Connection and read timeout, in milliseconds. If exceeded,
+ * IOException is thrown.
+ * @return The parsed HTML.
+ * @throws IOException
+ * If the final server response != 200 OK (redirects are
+ * followed), or if there's an error reading the response
+ * stream.
+ * @see #connect(String)
*/
public static Document parse(URL url, int timeoutMillis) throws IOException {
Connection con = HttpConnection.connect(url);
@@ -181,17 +236,20 @@ public class Jsoup {
}
/**
- Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
- tags and attributes.
-
- @param bodyHtml input untrusted HTML
- @param baseUri URL to resolve relative URLs against
- @param whitelist white-list of permitted HTML elements
- @return safe HTML
-
- @see Cleaner#clean(Document)
+ * Get safe HTML from untrusted input HTML, by parsing input HTML and
+ * filtering it through a white-list of permitted tags and attributes.
+ *
+ * @param bodyHtml
+ * input untrusted HTML
+ * @param baseUri
+ * URL to resolve relative URLs against
+ * @param whitelist
+ * white-list of permitted HTML elements
+ * @return safe HTML
+ * @see Cleaner#clean(Document)
*/
- public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
+ public static String clean(String bodyHtml, String baseUri,
+ Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
@@ -199,31 +257,37 @@ public class Jsoup {
}
/**
- Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
- tags and attributes.
-
- @param bodyHtml input untrusted HTML
- @param whitelist white-list of permitted HTML elements
- @return safe HTML
-
- @see Cleaner#clean(Document)
+ * Get safe HTML from untrusted input HTML, by parsing input HTML and
+ * filtering it through a white-list of permitted tags and attributes.
+ *
+ * @param bodyHtml
+ * input untrusted HTML
+ * @param whitelist
+ * white-list of permitted HTML elements
+ * @return safe HTML
+ * @see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, Whitelist whitelist) {
return clean(bodyHtml, "", whitelist);
}
/**
- Test if the input HTML has only tags and attributes allowed by the Whitelist. Useful for form validation. The input HTML should
- still be run through the cleaner to set up enforced attributes, and to tidy the output.
- @param bodyHtml HTML to test
- @param whitelist whitelist to test against
- @return true if no tags or attributes were removed; false otherwise
- @see #clean(String, org.jsoup.safety.Whitelist)
+ * Test if the input HTML has only tags and attributes allowed by the
+ * Whitelist. Useful for form validation. The input HTML should still be run
+ * through the cleaner to set up enforced attributes, and to tidy the
+ * output.
+ *
+ * @param bodyHtml
+ * HTML to test
+ * @param whitelist
+ * whitelist to test against
+ * @return true if no tags or attributes were removed; false otherwise
+ * @see #clean(String, org.jsoup.safety.Whitelist)
*/
public static boolean isValid(String bodyHtml, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, "");
Cleaner cleaner = new Cleaner(whitelist);
return cleaner.isValid(dirty);
}
-
+
}
diff --git a/server/src/org/jsoup/examples/HtmlToPlainText.java b/server/src/org/jsoup/examples/HtmlToPlainText.java
index 8f563e9608..53e485be34 100644
--- a/server/src/org/jsoup/examples/HtmlToPlainText.java
+++ b/server/src/org/jsoup/examples/HtmlToPlainText.java
@@ -1,5 +1,7 @@
package org.jsoup.examples;
+import java.io.IOException;
+
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
@@ -10,15 +12,15 @@ import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
-import java.io.IOException;
-
/**
- * HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted
- * plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a
- * scrape.
+ * HTML to plain-text. This example program demonstrates the use of jsoup to
+ * convert HTML input to lightly-formatted plain-text. That is divergent from
+ * the general goal of jsoup's .text() methods, which is to get clean data from
+ * a scrape.
* <p/>
- * Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend.
- *
+ * Note that this is a fairly simplistic formatter -- for real world use you'll
+ * want to embrace and extend.
+ *
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class HtmlToPlainText {
@@ -36,13 +38,16 @@ public class HtmlToPlainText {
/**
* Format an Element to plain-text
- * @param element the root element to format
+ *
+ * @param element
+ * the root element to format
* @return formatted text
*/
public String getPlainText(Element element) {
FormattingVisitor formatter = new FormattingVisitor();
NodeTraversor traversor = new NodeTraversor(formatter);
- traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node
+ traversor.traverse(element); // walk the DOM, and call .head() and
+ // .tail() for each node
return formatter.toString();
}
@@ -51,44 +56,57 @@ public class HtmlToPlainText {
private class FormattingVisitor implements NodeVisitor {
private static final int maxWidth = 80;
private int width = 0;
- private StringBuilder accum = new StringBuilder(); // holds the accumulated text
+ private StringBuilder accum = new StringBuilder(); // holds the
+ // accumulated text
// hit when the node is first seen
+ @Override
public void head(Node node, int depth) {
String name = node.nodeName();
- if (node instanceof TextNode)
- append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
- else if (name.equals("li"))
+ if (node instanceof TextNode) {
+ append(((TextNode) node).text()); // TextNodes carry all
+ // user-readable text in the
+ // DOM.
+ } else if (name.equals("li")) {
append("\n * ");
+ }
}
// hit when all of the node's children (if any) have been visited
+ @Override
public void tail(Node node, int depth) {
String name = node.nodeName();
- if (name.equals("br"))
+ if (name.equals("br")) {
append("\n");
- else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5"))
+ } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5")) {
append("\n\n");
- else if (name.equals("a"))
+ } else if (name.equals("a")) {
append(String.format(" <%s>", node.absUrl("href")));
+ }
}
// appends text to the string builder with a simple word wrap method
private void append(String text) {
- if (text.startsWith("\n"))
- width = 0; // reset counter if starts with a newline. only from formats above, not in natural text
- if (text.equals(" ") &&
- (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n")))
+ if (text.startsWith("\n")) {
+ width = 0; // reset counter if starts with a newline. only from
+ // formats above, not in natural text
+ }
+ if (text.equals(" ")
+ && (accum.length() == 0 || StringUtil.in(
+ accum.substring(accum.length() - 1), " ", "\n"))) {
return; // don't accumulate long runs of empty spaces
+ }
if (text.length() + width > maxWidth) { // won't fit, needs to wrap
String words[] = text.split("\\s+");
for (int i = 0; i < words.length; i++) {
String word = words[i];
boolean last = i == words.length - 1;
- if (!last) // insert a space if not the last word
+ if (!last) {
word = word + " ";
- if (word.length() + width > maxWidth) { // wrap and reset counter
+ }
+ if (word.length() + width > maxWidth) { // wrap and reset
+ // counter
accum.append("\n").append(word);
width = word.length();
} else {
@@ -102,6 +120,7 @@ public class HtmlToPlainText {
}
}
+ @Override
public String toString() {
return accum.toString();
}
diff --git a/server/src/org/jsoup/examples/ListLinks.java b/server/src/org/jsoup/examples/ListLinks.java
index 64b29ba107..d57a488435 100644
--- a/server/src/org/jsoup/examples/ListLinks.java
+++ b/server/src/org/jsoup/examples/ListLinks.java
@@ -1,13 +1,13 @@
package org.jsoup.examples;
+import java.io.IOException;
+
import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
-import java.io.IOException;
-
/**
* Example program to list links from a URL.
*/
@@ -24,22 +24,25 @@ public class ListLinks {
print("\nMedia: (%d)", media.size());
for (Element src : media) {
- if (src.tagName().equals("img"))
- print(" * %s: <%s> %sx%s (%s)",
- src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
- trim(src.attr("alt"), 20));
- else
+ if (src.tagName().equals("img")) {
+ print(" * %s: <%s> %sx%s (%s)", src.tagName(),
+ src.attr("abs:src"), src.attr("width"),
+ src.attr("height"), trim(src.attr("alt"), 20));
+ } else {
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
+ }
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
- print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));
+ print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"),
+ link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
- print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
+ print(" * a: <%s> (%s)", link.attr("abs:href"),
+ trim(link.text(), 35));
}
}
@@ -48,9 +51,10 @@ public class ListLinks {
}
private static String trim(String s, int width) {
- if (s.length() > width)
- return s.substring(0, width-1) + ".";
- else
+ if (s.length() > width) {
+ return s.substring(0, width - 1) + ".";
+ } else {
return s;
+ }
}
}
diff --git a/server/src/org/jsoup/helper/DataUtil.java b/server/src/org/jsoup/helper/DataUtil.java
index 9adfe42153..26b85ea7dc 100644
--- a/server/src/org/jsoup/helper/DataUtil.java
+++ b/server/src/org/jsoup/helper/DataUtil.java
@@ -1,102 +1,147 @@
package org.jsoup.helper;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.parser.Parser;
-
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.parser.Parser;
+
/**
* Internal static utilities for handling data.
- *
+ *
*/
public class DataUtil {
- private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
- static final String defaultCharset = "UTF-8"; // used if not found in header or meta charset
+ private static final Pattern charsetPattern = Pattern
+ .compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
+ static final String defaultCharset = "UTF-8"; // used if not found in header
+ // or meta charset
private static final int bufferSize = 0x20000; // ~130K.
- private DataUtil() {}
+ private DataUtil() {
+ }
/**
* Loads a file to a Document.
- * @param in file to load
- * @param charsetName character set of input
- * @param baseUri base URI of document, to resolve relative links against
+ *
+ * @param in
+ * file to load
+ * @param charsetName
+ * character set of input
+ * @param baseUri
+ * base URI of document, to resolve relative links against
* @return Document
- * @throws IOException on IO error
+ * @throws IOException
+ * on IO error
*/
- public static Document load(File in, String charsetName, String baseUri) throws IOException {
+ public static Document load(File in, String charsetName, String baseUri)
+ throws IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(in);
ByteBuffer byteData = readToByteBuffer(inStream);
- return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+ return parseByteData(byteData, charsetName, baseUri,
+ Parser.htmlParser());
} finally {
- if (inStream != null)
+ if (inStream != null) {
inStream.close();
+ }
}
}
/**
* Parses a Document from an input steam.
- * @param in input stream to parse. You will need to close it.
- * @param charsetName character set of input
- * @param baseUri base URI of document, to resolve relative links against
+ *
+ * @param in
+ * input stream to parse. You will need to close it.
+ * @param charsetName
+ * character set of input
+ * @param baseUri
+ * base URI of document, to resolve relative links against
* @return Document
- * @throws IOException on IO error
+ * @throws IOException
+ * on IO error
*/
- public static Document load(InputStream in, String charsetName, String baseUri) throws IOException {
+ public static Document load(InputStream in, String charsetName,
+ String baseUri) throws IOException {
ByteBuffer byteData = readToByteBuffer(in);
- return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+ return parseByteData(byteData, charsetName, baseUri,
+ Parser.htmlParser());
}
/**
* Parses a Document from an input steam, using the provided Parser.
- * @param in input stream to parse. You will need to close it.
- * @param charsetName character set of input
- * @param baseUri base URI of document, to resolve relative links against
- * @param parser alternate {@link Parser#xmlParser() parser} to use.
+ *
+ * @param in
+ * input stream to parse. You will need to close it.
+ * @param charsetName
+ * character set of input
+ * @param baseUri
+ * base URI of document, to resolve relative links against
+ * @param parser
+ * alternate {@link Parser#xmlParser() parser} to use.
* @return Document
- * @throws IOException on IO error
+ * @throws IOException
+ * on IO error
*/
- public static Document load(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
+ public static Document load(InputStream in, String charsetName,
+ String baseUri, Parser parser) throws IOException {
ByteBuffer byteData = readToByteBuffer(in);
return parseByteData(byteData, charsetName, baseUri, parser);
}
- // reads bytes first into a buffer, then decodes with the appropriate charset. done this way to support
- // switching the chartset midstream when a meta http-equiv tag defines the charset.
- static Document parseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser) {
+ // reads bytes first into a buffer, then decodes with the appropriate
+ // charset. done this way to support
+ // switching the chartset midstream when a meta http-equiv tag defines the
+ // charset.
+ static Document parseByteData(ByteBuffer byteData, String charsetName,
+ String baseUri, Parser parser) {
String docData;
Document doc = null;
if (charsetName == null) { // determine from meta. safe parse as UTF-8
- // look for <meta http-equiv="Content-Type" content="text/html;charset=gb2312"> or HTML5 <meta charset="gb2312">
- docData = Charset.forName(defaultCharset).decode(byteData).toString();
+ // look for <meta http-equiv="Content-Type"
+ // content="text/html;charset=gb2312"> or HTML5 <meta
+ // charset="gb2312">
+ docData = Charset.forName(defaultCharset).decode(byteData)
+ .toString();
doc = parser.parseInput(docData, baseUri);
- Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first();
+ Element meta = doc.select(
+ "meta[http-equiv=content-type], meta[charset]").first();
if (meta != null) { // if not found, will keep utf-8 as best attempt
- String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta.attr("content")) : meta.attr("charset");
- if (foundCharset != null && foundCharset.length() != 0 && !foundCharset.equals(defaultCharset)) { // need to re-decode
+ String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta
+ .attr("content")) : meta.attr("charset");
+ if (foundCharset != null && foundCharset.length() != 0
+ && !foundCharset.equals(defaultCharset)) { // need to
+ // re-decode
charsetName = foundCharset;
byteData.rewind();
- docData = Charset.forName(foundCharset).decode(byteData).toString();
+ docData = Charset.forName(foundCharset).decode(byteData)
+ .toString();
doc = null;
}
}
} else { // specified by content type header (or by user on file load)
- Validate.notEmpty(charsetName, "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
+ Validate.notEmpty(
+ charsetName,
+ "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
docData = Charset.forName(charsetName).decode(byteData).toString();
}
if (doc == null) {
- // there are times where there is a spurious byte-order-mark at the start of the text. Shouldn't be present
- // in utf-8. If after decoding, there is a BOM, strip it; otherwise will cause the parser to go straight
+ // there are times where there is a spurious byte-order-mark at the
+ // start of the text. Shouldn't be present
+ // in utf-8. If after decoding, there is a BOM, strip it; otherwise
+ // will cause the parser to go straight
// into head mode
- if (docData.charAt(0) == 65279)
+ if (docData.charAt(0) == 65279) {
docData = docData.substring(1);
+ }
doc = parser.parseInput(docData, baseUri);
doc.outputSettings().charset(charsetName);
@@ -108,9 +153,11 @@ public class DataUtil {
byte[] buffer = new byte[bufferSize];
ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
int read;
- while(true) {
- read = inStream.read(buffer);
- if (read == -1) break;
+ while (true) {
+ read = inStream.read(buffer);
+ if (read == -1) {
+ break;
+ }
outStream.write(buffer, 0, read);
}
ByteBuffer byteData = ByteBuffer.wrap(outStream.toByteArray());
@@ -119,17 +166,21 @@ public class DataUtil {
/**
* Parse out a charset from a content type header.
- * @param contentType e.g. "text/html; charset=EUC-JP"
- * @return "EUC-JP", or null if not found. Charset is trimmed and uppercased.
+ *
+ * @param contentType
+ * e.g. "text/html; charset=EUC-JP"
+ * @return "EUC-JP", or null if not found. Charset is trimmed and
+ * uppercased.
*/
static String getCharsetFromContentType(String contentType) {
- if (contentType == null) return null;
+ if (contentType == null) {
+ return null;
+ }
Matcher m = charsetPattern.matcher(contentType);
if (m.find()) {
return m.group(1).trim().toUpperCase();
}
return null;
}
-
-
+
}
diff --git a/server/src/org/jsoup/helper/DescendableLinkedList.java b/server/src/org/jsoup/helper/DescendableLinkedList.java
index 28ca1971eb..97595c34e6 100644
--- a/server/src/org/jsoup/helper/DescendableLinkedList.java
+++ b/server/src/org/jsoup/helper/DescendableLinkedList.java
@@ -5,7 +5,8 @@ import java.util.LinkedList;
import java.util.ListIterator;
/**
- * Provides a descending iterator and other 1.6 methods to allow support on the 1.5 JRE.
+ * Provides a descending iterator and other 1.6 methods to allow support on the
+ * 1.5 JRE.
*/
public class DescendableLinkedList<E> extends LinkedList<E> {
@@ -18,32 +19,43 @@ public class DescendableLinkedList<E> extends LinkedList<E> {
/**
* Add a new element to the start of the list.
- * @param e element to add
+ *
+ * @param e
+ * element to add
*/
+ @Override
public void push(E e) {
addFirst(e);
}
/**
* Look at the last element, if there is one.
+ *
* @return the last element, or null
*/
+ @Override
public E peekLast() {
return size() == 0 ? null : getLast();
}
/**
* Remove and return the last element, if there is one
+ *
* @return the last element, or null
*/
+ @Override
public E pollLast() {
return size() == 0 ? null : removeLast();
}
/**
- * Get an iterator that starts and the end of the list and works towards the start.
- * @return an iterator that starts and the end of the list and works towards the start.
+ * Get an iterator that starts and the end of the list and works towards the
+ * start.
+ *
+ * @return an iterator that starts and the end of the list and works towards
+ * the start.
*/
+ @Override
public Iterator<E> descendingIterator() {
return new DescendingIterator<E>(size());
}
@@ -58,16 +70,20 @@ public class DescendableLinkedList<E> extends LinkedList<E> {
/**
* Check if there is another element on the list.
+ *
* @return if another element
*/
+ @Override
public boolean hasNext() {
return iter.hasPrevious();
}
/**
* Get the next element.
+ *
* @return the next element.
*/
+ @Override
public E next() {
return iter.previous();
}
@@ -75,6 +91,7 @@ public class DescendableLinkedList<E> extends LinkedList<E> {
/**
* Remove the current element.
*/
+ @Override
public void remove() {
iter.remove();
}
diff --git a/server/src/org/jsoup/helper/HttpConnection.java b/server/src/org/jsoup/helper/HttpConnection.java
index 06200a2547..a48f8972c2 100644
--- a/server/src/org/jsoup/helper/HttpConnection.java
+++ b/server/src/org/jsoup/helper/HttpConnection.java
@@ -1,23 +1,32 @@
package org.jsoup.helper;
-import org.jsoup.Connection;
-import org.jsoup.nodes.Document;
-import org.jsoup.parser.Parser;
-import org.jsoup.parser.TokenQueue;
-
-import java.io.*;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
import java.util.zip.GZIPInputStream;
+import org.jsoup.Connection;
+import org.jsoup.nodes.Document;
+import org.jsoup.parser.Parser;
+import org.jsoup.parser.TokenQueue;
+
/**
* Implementation of {@link Connection}.
- * @see org.jsoup.Jsoup#connect(String)
+ *
+ * @see org.jsoup.Jsoup#connect(String)
*/
public class HttpConnection implements Connection {
public static Connection connect(String url) {
@@ -35,16 +44,18 @@ public class HttpConnection implements Connection {
private Connection.Request req;
private Connection.Response res;
- private HttpConnection() {
+ private HttpConnection() {
req = new Request();
res = new Response();
}
+ @Override
public Connection url(URL url) {
req.url(url);
return this;
}
+ @Override
public Connection url(String url) {
Validate.notEmpty(url, "Must supply a valid URL");
try {
@@ -55,48 +66,57 @@ public class HttpConnection implements Connection {
return this;
}
+ @Override
public Connection userAgent(String userAgent) {
Validate.notNull(userAgent, "User agent must not be null");
req.header("User-Agent", userAgent);
return this;
}
+ @Override
public Connection timeout(int millis) {
req.timeout(millis);
return this;
}
+ @Override
public Connection followRedirects(boolean followRedirects) {
req.followRedirects(followRedirects);
return this;
}
+ @Override
public Connection referrer(String referrer) {
Validate.notNull(referrer, "Referrer must not be null");
req.header("Referer", referrer);
return this;
}
+ @Override
public Connection method(Method method) {
req.method(method);
return this;
}
+ @Override
public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
- req.ignoreHttpErrors(ignoreHttpErrors);
- return this;
- }
+ req.ignoreHttpErrors(ignoreHttpErrors);
+ return this;
+ }
+ @Override
public Connection ignoreContentType(boolean ignoreContentType) {
req.ignoreContentType(ignoreContentType);
return this;
}
+ @Override
public Connection data(String key, String value) {
req.data(KeyVal.create(key, value));
return this;
}
+ @Override
public Connection data(Map<String, String> data) {
Validate.notNull(data, "Data map must not be null");
for (Map.Entry<String, String> entry : data.entrySet()) {
@@ -105,12 +125,14 @@ public class HttpConnection implements Connection {
return this;
}
+ @Override
public Connection data(String... keyvals) {
Validate.notNull(keyvals, "Data key value pairs must not be null");
- Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
+ Validate.isTrue(keyvals.length % 2 == 0,
+ "Must supply an even number of key value pairs");
for (int i = 0; i < keyvals.length; i += 2) {
String key = keyvals[i];
- String value = keyvals[i+1];
+ String value = keyvals[i + 1];
Validate.notEmpty(key, "Data key must not be empty");
Validate.notNull(value, "Data value must not be null");
req.data(KeyVal.create(key, value));
@@ -118,16 +140,19 @@ public class HttpConnection implements Connection {
return this;
}
+ @Override
public Connection header(String name, String value) {
req.header(name, value);
return this;
}
+ @Override
public Connection cookie(String name, String value) {
req.cookie(name, value);
return this;
}
+ @Override
public Connection cookies(Map<String, String> cookies) {
Validate.notNull(cookies, "Cookie map must not be null");
for (Map.Entry<String, String> entry : cookies.entrySet()) {
@@ -136,48 +161,57 @@ public class HttpConnection implements Connection {
return this;
}
+ @Override
public Connection parser(Parser parser) {
req.parser(parser);
return this;
}
+ @Override
public Document get() throws IOException {
req.method(Method.GET);
execute();
return res.parse();
}
+ @Override
public Document post() throws IOException {
req.method(Method.POST);
execute();
return res.parse();
}
+ @Override
public Connection.Response execute() throws IOException {
res = Response.execute(req);
return res;
}
+ @Override
public Connection.Request request() {
return req;
}
+ @Override
public Connection request(Connection.Request request) {
req = request;
return this;
}
+ @Override
public Connection.Response response() {
return res;
}
+ @Override
public Connection response(Connection.Response response) {
res = response;
return this;
}
- @SuppressWarnings({"unchecked"})
- private static abstract class Base<T extends Connection.Base> implements Connection.Base<T> {
+ @SuppressWarnings({ "unchecked" })
+ private static abstract class Base<T extends Connection.Base> implements
+ Connection.Base<T> {
URL url;
Method method;
Map<String, String> headers;
@@ -188,66 +222,83 @@ public class HttpConnection implements Connection {
cookies = new LinkedHashMap<String, String>();
}
+ @Override
public URL url() {
return url;
}
+ @Override
public T url(URL url) {
Validate.notNull(url, "URL must not be null");
this.url = url;
return (T) this;
}
+ @Override
public Method method() {
return method;
}
+ @Override
public T method(Method method) {
Validate.notNull(method, "Method must not be null");
this.method = method;
return (T) this;
}
+ @Override
public String header(String name) {
Validate.notNull(name, "Header name must not be null");
return getHeaderCaseInsensitive(name);
}
+ @Override
public T header(String name, String value) {
Validate.notEmpty(name, "Header name must not be empty");
Validate.notNull(value, "Header value must not be null");
- removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
+ removeHeader(name); // ensures we don't get an "accept-encoding" and
+ // a "Accept-Encoding"
headers.put(name, value);
return (T) this;
}
+ @Override
public boolean hasHeader(String name) {
Validate.notEmpty(name, "Header name must not be empty");
return getHeaderCaseInsensitive(name) != null;
}
+ @Override
public T removeHeader(String name) {
Validate.notEmpty(name, "Header name must not be empty");
- Map.Entry<String, String> entry = scanHeaders(name); // remove is case insensitive too
- if (entry != null)
+ Map.Entry<String, String> entry = scanHeaders(name); // remove is
+ // case
+ // insensitive
+ // too
+ if (entry != null) {
headers.remove(entry.getKey()); // ensures correct case
+ }
return (T) this;
}
+ @Override
public Map<String, String> headers() {
return headers;
}
private String getHeaderCaseInsensitive(String name) {
Validate.notNull(name, "Header name must not be null");
- // quick evals for common case of title case, lower case, then scan for mixed
+ // quick evals for common case of title case, lower case, then scan
+ // for mixed
String value = headers.get(name);
- if (value == null)
+ if (value == null) {
value = headers.get(name.toLowerCase());
+ }
if (value == null) {
Map.Entry<String, String> entry = scanHeaders(name);
- if (entry != null)
+ if (entry != null) {
value = entry.getValue();
+ }
}
return value;
}
@@ -255,17 +306,20 @@ public class HttpConnection implements Connection {
private Map.Entry<String, String> scanHeaders(String name) {
String lc = name.toLowerCase();
for (Map.Entry<String, String> entry : headers.entrySet()) {
- if (entry.getKey().toLowerCase().equals(lc))
+ if (entry.getKey().toLowerCase().equals(lc)) {
return entry;
+ }
}
return null;
}
+ @Override
public String cookie(String name) {
Validate.notNull(name, "Cookie name must not be null");
return cookies.get(name);
}
+ @Override
public T cookie(String name, String value) {
Validate.notEmpty(name, "Cookie name must not be empty");
Validate.notNull(value, "Cookie value must not be null");
@@ -273,23 +327,27 @@ public class HttpConnection implements Connection {
return (T) this;
}
+ @Override
public boolean hasCookie(String name) {
Validate.notEmpty("Cookie name must not be empty");
return cookies.containsKey(name);
}
+ @Override
public T removeCookie(String name) {
Validate.notEmpty("Cookie name must not be empty");
cookies.remove(name);
return (T) this;
}
+ @Override
public Map<String, String> cookies() {
return cookies;
}
}
- public static class Request extends Base<Connection.Request> implements Connection.Request {
+ public static class Request extends Base<Connection.Request> implements
+ Connection.Request {
private int timeoutMilliseconds;
private boolean followRedirects;
private Collection<Connection.KeyVal> data;
@@ -297,7 +355,7 @@ public class HttpConnection implements Connection {
private boolean ignoreContentType = false;
private Parser parser;
- private Request() {
+ private Request() {
timeoutMilliseconds = 3000;
followRedirects = true;
data = new ArrayList<Connection.KeyVal>();
@@ -306,64 +364,78 @@ public class HttpConnection implements Connection {
parser = Parser.htmlParser();
}
+ @Override
public int timeout() {
return timeoutMilliseconds;
}
+ @Override
public Request timeout(int millis) {
- Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
+ Validate.isTrue(millis >= 0,
+ "Timeout milliseconds must be 0 (infinite) or greater");
timeoutMilliseconds = millis;
return this;
}
+ @Override
public boolean followRedirects() {
return followRedirects;
}
+ @Override
public Connection.Request followRedirects(boolean followRedirects) {
this.followRedirects = followRedirects;
return this;
}
+ @Override
public boolean ignoreHttpErrors() {
return ignoreHttpErrors;
}
+ @Override
public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
this.ignoreHttpErrors = ignoreHttpErrors;
return this;
}
+ @Override
public boolean ignoreContentType() {
return ignoreContentType;
}
+ @Override
public Connection.Request ignoreContentType(boolean ignoreContentType) {
this.ignoreContentType = ignoreContentType;
return this;
}
+ @Override
public Request data(Connection.KeyVal keyval) {
Validate.notNull(keyval, "Key val must not be null");
data.add(keyval);
return this;
}
+ @Override
public Collection<Connection.KeyVal> data() {
return data;
}
-
+
+ @Override
public Request parser(Parser parser) {
this.parser = parser;
return this;
}
-
+
+ @Override
public Parser parser() {
return parser;
}
}
- public static class Response extends Base<Connection.Response> implements Connection.Response {
+ public static class Response extends Base<Connection.Response> implements
+ Connection.Response {
private static final int MAX_REDIRECTS = 20;
private int statusCode;
private String statusMessage;
@@ -382,44 +454,65 @@ public class HttpConnection implements Connection {
super();
if (previousResponse != null) {
numRedirects = previousResponse.numRedirects + 1;
- if (numRedirects >= MAX_REDIRECTS)
- throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
+ if (numRedirects >= MAX_REDIRECTS) {
+ throw new IOException(
+ String.format(
+ "Too many redirects occurred trying to load URL %s",
+ previousResponse.url()));
+ }
}
}
-
+
static Response execute(Connection.Request req) throws IOException {
return execute(req, null);
}
- static Response execute(Connection.Request req, Response previousResponse) throws IOException {
+ static Response execute(Connection.Request req,
+ Response previousResponse) throws IOException {
Validate.notNull(req, "Request must not be null");
String protocol = req.url().getProtocol();
- Validate
- .isTrue(protocol.equals("http") || protocol.equals("https"), "Only http & https protocols supported");
+ Validate.isTrue(
+ protocol.equals("http") || protocol.equals("https"),
+ "Only http & https protocols supported");
// set up the request for execution
- if (req.method() == Connection.Method.GET && req.data().size() > 0)
+ if (req.method() == Connection.Method.GET && req.data().size() > 0) {
serialiseRequestUrl(req); // appends query string
+ }
HttpURLConnection conn = createConnection(req);
conn.connect();
- if (req.method() == Connection.Method.POST)
- writePost(req.data(), conn.getOutputStream());
+ if (req.method() == Connection.Method.POST) {
+ writePost(req.data(), conn.getOutputStream());
+ }
int status = conn.getResponseCode();
boolean needsRedirect = false;
if (status != HttpURLConnection.HTTP_OK) {
- if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER)
+ if (status == HttpURLConnection.HTTP_MOVED_TEMP
+ || status == HttpURLConnection.HTTP_MOVED_PERM
+ || status == HttpURLConnection.HTTP_SEE_OTHER) {
needsRedirect = true;
- else if (!req.ignoreHttpErrors())
- throw new IOException(status + " error loading URL " + req.url().toString());
+ } else if (!req.ignoreHttpErrors()) {
+ throw new IOException(status + " error loading URL "
+ + req.url().toString());
+ }
}
Response res = new Response(previousResponse);
res.setupFromConnection(conn, previousResponse);
if (needsRedirect && req.followRedirects()) {
- req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
+ req.method(Method.GET); // always redirect with a get. any data
+ // param from original req are dropped.
req.data().clear();
req.url(new URL(req.url(), res.header("Location")));
- for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add response cookies to request (for e.g. login posts)
+ for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add
+ // response
+ // cookies
+ // to
+ // request
+ // (for
+ // e.g.
+ // login
+ // posts)
req.cookie(cookie.getKey(), cookie.getValue());
}
return execute(req, res);
@@ -429,77 +522,120 @@ public class HttpConnection implements Connection {
InputStream bodyStream = null;
InputStream dataStream = null;
try {
- dataStream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
- bodyStream = res.hasHeader("Content-Encoding") && res.header("Content-Encoding").equalsIgnoreCase("gzip") ?
- new BufferedInputStream(new GZIPInputStream(dataStream)) :
- new BufferedInputStream(dataStream);
-
+ dataStream = conn.getErrorStream() != null ? conn
+ .getErrorStream() : conn.getInputStream();
+ bodyStream = res.hasHeader("Content-Encoding")
+ && res.header("Content-Encoding").equalsIgnoreCase(
+ "gzip") ? new BufferedInputStream(
+ new GZIPInputStream(dataStream))
+ : new BufferedInputStream(dataStream);
+
res.byteData = DataUtil.readToByteBuffer(bodyStream);
- res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
+ res.charset = DataUtil
+ .getCharsetFromContentType(res.contentType); // may be
+ // null,
+ // readInputStream
+ // deals
+ // with it
} finally {
- if (bodyStream != null) bodyStream.close();
- if (dataStream != null) dataStream.close();
+ if (bodyStream != null) {
+ bodyStream.close();
+ }
+ if (dataStream != null) {
+ dataStream.close();
+ }
}
res.executed = true;
return res;
}
+ @Override
public int statusCode() {
return statusCode;
}
+ @Override
public String statusMessage() {
return statusMessage;
}
+ @Override
public String charset() {
return charset;
}
+ @Override
public String contentType() {
return contentType;
}
+ @Override
public Document parse() throws IOException {
- Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
- if (!req.ignoreContentType() && (contentType == null || !(contentType.startsWith("text/") || contentType.startsWith("application/xml") || contentType.startsWith("application/xhtml+xml"))))
- throw new IOException(String.format("Unhandled content type \"%s\" on URL %s. Must be text/*, application/xml, or application/xhtml+xml",
- contentType, url.toString()));
- Document doc = DataUtil.parseByteData(byteData, charset, url.toExternalForm(), req.parser());
+ Validate.isTrue(
+ executed,
+ "Request must be executed (with .execute(), .get(), or .post() before parsing response");
+ if (!req.ignoreContentType()
+ && (contentType == null || !(contentType
+ .startsWith("text/")
+ || contentType.startsWith("application/xml") || contentType
+ .startsWith("application/xhtml+xml")))) {
+ throw new IOException(
+ String.format(
+ "Unhandled content type \"%s\" on URL %s. Must be text/*, application/xml, or application/xhtml+xml",
+ contentType, url.toString()));
+ }
+ Document doc = DataUtil.parseByteData(byteData, charset,
+ url.toExternalForm(), req.parser());
byteData.rewind();
- charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
+ charset = doc.outputSettings().charset().name(); // update charset
+ // from meta-equiv,
+ // possibly
return doc;
}
+ @Override
public String body() {
- Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
- // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
+ Validate.isTrue(
+ executed,
+ "Request must be executed (with .execute(), .get(), or .post() before getting response body");
+ // charset gets set from header on execute, and from meta-equiv on
+ // parse. parse may not have happened yet
String body;
- if (charset == null)
- body = Charset.forName(DataUtil.defaultCharset).decode(byteData).toString();
- else
+ if (charset == null) {
+ body = Charset.forName(DataUtil.defaultCharset)
+ .decode(byteData).toString();
+ } else {
body = Charset.forName(charset).decode(byteData).toString();
+ }
byteData.rewind();
return body;
}
+ @Override
public byte[] bodyAsBytes() {
- Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
+ Validate.isTrue(
+ executed,
+ "Request must be executed (with .execute(), .get(), or .post() before getting response body");
return byteData.array();
}
// set up connection defaults, and details from request
- private static HttpURLConnection createConnection(Connection.Request req) throws IOException {
- HttpURLConnection conn = (HttpURLConnection) req.url().openConnection();
+ private static HttpURLConnection createConnection(Connection.Request req)
+ throws IOException {
+ HttpURLConnection conn = (HttpURLConnection) req.url()
+ .openConnection();
conn.setRequestMethod(req.method().name());
- conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
+ conn.setInstanceFollowRedirects(false); // don't rely on native
+ // redirection support
conn.setConnectTimeout(req.timeout());
conn.setReadTimeout(req.timeout());
- if (req.method() == Method.POST)
+ if (req.method() == Method.POST) {
conn.setDoOutput(true);
- if (req.cookies().size() > 0)
+ }
+ if (req.cookies().size() > 0) {
conn.addRequestProperty("Cookie", getRequestCookieString(req));
+ }
for (Map.Entry<String, String> header : req.headers().entrySet()) {
conn.addRequestProperty(header.getKey(), header.getValue());
}
@@ -507,7 +643,8 @@ public class HttpConnection implements Connection {
}
// set up url, method, header, cookies
- private void setupFromConnection(HttpURLConnection conn, Connection.Response previousResponse) throws IOException {
+ private void setupFromConnection(HttpURLConnection conn,
+ Connection.Response previousResponse) throws IOException {
method = Connection.Method.valueOf(conn.getRequestMethod());
url = conn.getURL();
statusCode = conn.getResponseCode();
@@ -517,11 +654,14 @@ public class HttpConnection implements Connection {
Map<String, List<String>> resHeaders = conn.getHeaderFields();
processResponseHeaders(resHeaders);
- // if from a redirect, map previous response cookies into this response
+ // if from a redirect, map previous response cookies into this
+ // response
if (previousResponse != null) {
- for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
- if (!hasCookie(prevCookie.getKey()))
+ for (Map.Entry<String, String> prevCookie : previousResponse
+ .cookies().entrySet()) {
+ if (!hasCookie(prevCookie.getKey())) {
cookie(prevCookie.getKey(), prevCookie.getValue());
+ }
}
}
}
@@ -529,86 +669,98 @@ public class HttpConnection implements Connection {
void processResponseHeaders(Map<String, List<String>> resHeaders) {
for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
String name = entry.getKey();
- if (name == null)
+ if (name == null) {
continue; // http/1.1 line
+ }
List<String> values = entry.getValue();
if (name.equalsIgnoreCase("Set-Cookie")) {
for (String value : values) {
- if (value == null)
+ if (value == null) {
continue;
+ }
TokenQueue cd = new TokenQueue(value);
String cookieName = cd.chompTo("=").trim();
String cookieVal = cd.consumeTo(";").trim();
- if (cookieVal == null)
+ if (cookieVal == null) {
cookieVal = "";
+ }
// ignores path, date, domain, secure et al. req'd?
// name not blank, value not null
- if (cookieName != null && cookieName.length() > 0)
+ if (cookieName != null && cookieName.length() > 0) {
cookie(cookieName, cookieVal);
+ }
}
} else { // only take the first instance of each header
- if (!values.isEmpty())
+ if (!values.isEmpty()) {
header(name, values.get(0));
+ }
}
}
}
- private static void writePost(Collection<Connection.KeyVal> data, OutputStream outputStream) throws IOException {
- OutputStreamWriter w = new OutputStreamWriter(outputStream, DataUtil.defaultCharset);
+ private static void writePost(Collection<Connection.KeyVal> data,
+ OutputStream outputStream) throws IOException {
+ OutputStreamWriter w = new OutputStreamWriter(outputStream,
+ DataUtil.defaultCharset);
boolean first = true;
for (Connection.KeyVal keyVal : data) {
- if (!first)
+ if (!first) {
w.append('&');
- else
+ } else {
first = false;
-
+ }
+
w.write(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset));
w.write('=');
- w.write(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
+ w.write(URLEncoder.encode(keyVal.value(),
+ DataUtil.defaultCharset));
}
w.close();
}
-
+
private static String getRequestCookieString(Connection.Request req) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (Map.Entry<String, String> cookie : req.cookies().entrySet()) {
- if (!first)
+ if (!first) {
sb.append("; ");
- else
+ } else {
first = false;
- sb.append(cookie.getKey()).append('=').append(cookie.getValue());
- // todo: spec says only ascii, no escaping / encoding defined. validate on set? or escape somehow here?
+ }
+ sb.append(cookie.getKey()).append('=')
+ .append(cookie.getValue());
+ // todo: spec says only ascii, no escaping / encoding defined.
+ // validate on set? or escape somehow here?
}
return sb.toString();
}
// for get url reqs, serialise the data map into the url
- private static void serialiseRequestUrl(Connection.Request req) throws IOException {
+ private static void serialiseRequestUrl(Connection.Request req)
+ throws IOException {
URL in = req.url();
StringBuilder url = new StringBuilder();
boolean first = true;
// reconstitute the query, ready for appends
- url
- .append(in.getProtocol())
- .append("://")
- .append(in.getAuthority()) // includes host, port
- .append(in.getPath())
- .append("?");
+ url.append(in.getProtocol()).append("://")
+ .append(in.getAuthority()) // includes host, port
+ .append(in.getPath()).append("?");
if (in.getQuery() != null) {
url.append(in.getQuery());
first = false;
}
for (Connection.KeyVal keyVal : req.data()) {
- if (!first)
+ if (!first) {
url.append('&');
- else
+ } else {
first = false;
- url
- .append(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
- .append('=')
- .append(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
+ }
+ url.append(
+ URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
+ .append('=')
+ .append(URLEncoder.encode(keyVal.value(),
+ DataUtil.defaultCharset));
}
req.url(new URL(url.toString()));
req.data().clear(); // moved into url as get params
@@ -630,22 +782,26 @@ public class HttpConnection implements Connection {
this.value = value;
}
+ @Override
public KeyVal key(String key) {
Validate.notEmpty(key, "Data key must not be empty");
this.key = key;
return this;
}
+ @Override
public String key() {
return key;
}
+ @Override
public KeyVal value(String value) {
Validate.notNull(value, "Data value must not be null");
this.value = value;
return this;
}
+ @Override
public String value() {
return value;
}
@@ -653,6 +809,6 @@ public class HttpConnection implements Connection {
@Override
public String toString() {
return key + "=" + value;
- }
+ }
}
}
diff --git a/server/src/org/jsoup/helper/StringUtil.java b/server/src/org/jsoup/helper/StringUtil.java
index 071a92c7a5..5a3d19b0aa 100644
--- a/server/src/org/jsoup/helper/StringUtil.java
+++ b/server/src/org/jsoup/helper/StringUtil.java
@@ -8,12 +8,16 @@ import java.util.Iterator;
*/
public final class StringUtil {
// memoised padding up to 10
- private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
+ private static final String[] padding = { "", " ", " ", " ", " ",
+ " ", " ", " ", " ", " ", " " };
/**
* Join a collection of strings by a seperator
- * @param strings collection of string objects
- * @param sep string to place between strings
+ *
+ * @param strings
+ * collection of string objects
+ * @param sep
+ * string to place between strings
* @return joined string
*/
public static String join(Collection strings, String sep) {
@@ -22,17 +26,22 @@ public final class StringUtil {
/**
* Join a collection of strings by a seperator
- * @param strings iterator of string objects
- * @param sep string to place between strings
+ *
+ * @param strings
+ * iterator of string objects
+ * @param sep
+ * string to place between strings
* @return joined string
*/
public static String join(Iterator strings, String sep) {
- if (!strings.hasNext())
+ if (!strings.hasNext()) {
return "";
+ }
String start = strings.next().toString();
- if (!strings.hasNext()) // only one, avoid builder
+ if (!strings.hasNext()) {
return start;
+ }
StringBuilder sb = new StringBuilder(64).append(start);
while (strings.hasNext()) {
@@ -44,62 +53,79 @@ public final class StringUtil {
/**
* Returns space padding
- * @param width amount of padding desired
+ *
+ * @param width
+ * amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
- if (width < 0)
+ if (width < 0) {
throw new IllegalArgumentException("width must be > 0");
+ }
- if (width < padding.length)
+ if (width < padding.length) {
return padding[width];
+ }
char[] out = new char[width];
- for (int i = 0; i < width; i++)
+ for (int i = 0; i < width; i++) {
out[i] = ' ';
+ }
return String.valueOf(out);
}
/**
- * Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
- * @param string string to test
+ * Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n,
+ * \t, etc)
+ *
+ * @param string
+ * string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
- if (string == null || string.length() == 0)
+ if (string == null || string.length() == 0) {
return true;
+ }
int l = string.length();
for (int i = 0; i < l; i++) {
- if (!StringUtil.isWhitespace(string.codePointAt(i)))
+ if (!StringUtil.isWhitespace(string.codePointAt(i))) {
return false;
+ }
}
return true;
}
/**
* Tests if a string is numeric, i.e. contains only digit characters
- * @param string string to test
- * @return true if only digit chars, false if empty or null or contains non-digit chrs
+ *
+ * @param string
+ * string to test
+ * @return true if only digit chars, false if empty or null or contains
+ * non-digit chrs
*/
public static boolean isNumeric(String string) {
- if (string == null || string.length() == 0)
+ if (string == null || string.length() == 0) {
return false;
+ }
int l = string.length();
for (int i = 0; i < l; i++) {
- if (!Character.isDigit(string.codePointAt(i)))
+ if (!Character.isDigit(string.codePointAt(i))) {
return false;
+ }
}
return true;
}
/**
* Tests if a code point is "whitespace" as defined in the HTML spec.
- * @param c code point to test
+ *
+ * @param c
+ * code point to test
* @return true if code point is whitespace, false otherwise
*/
- public static boolean isWhitespace(int c){
+ public static boolean isWhitespace(int c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}
@@ -117,12 +143,12 @@ public final class StringUtil {
modified = true;
continue;
}
- if (c != ' ')
+ if (c != ' ') {
modified = true;
+ }
sb.append(' ');
lastWasWhite = true;
- }
- else {
+ } else {
sb.appendCodePoint(c);
lastWasWhite = false;
}
@@ -132,8 +158,9 @@ public final class StringUtil {
public static boolean in(String needle, String... haystack) {
for (String hay : haystack) {
- if (hay.equals(needle))
- return true;
+ if (hay.equals(needle)) {
+ return true;
+ }
}
return false;
}
diff --git a/server/src/org/jsoup/helper/Validate.java b/server/src/org/jsoup/helper/Validate.java
index 814bcc3a40..e9fe04f87b 100644
--- a/server/src/org/jsoup/helper/Validate.java
+++ b/server/src/org/jsoup/helper/Validate.java
@@ -4,69 +4,93 @@ package org.jsoup.helper;
* Simple validation methods. Designed for jsoup internal use
*/
public final class Validate {
-
- private Validate() {}
+
+ private Validate() {
+ }
/**
* Validates that the object is not null
- * @param obj object to test
+ *
+ * @param obj
+ * object to test
*/
public static void notNull(Object obj) {
- if (obj == null)
+ if (obj == null) {
throw new IllegalArgumentException("Object must not be null");
+ }
}
/**
* Validates that the object is not null
- * @param obj object to test
- * @param msg message to output if validation fails
+ *
+ * @param obj
+ * object to test
+ * @param msg
+ * message to output if validation fails
*/
public static void notNull(Object obj, String msg) {
- if (obj == null)
+ if (obj == null) {
throw new IllegalArgumentException(msg);
+ }
}
/**
* Validates that the value is true
- * @param val object to test
+ *
+ * @param val
+ * object to test
*/
public static void isTrue(boolean val) {
- if (!val)
+ if (!val) {
throw new IllegalArgumentException("Must be true");
+ }
}
/**
* Validates that the value is true
- * @param val object to test
- * @param msg message to output if validation fails
+ *
+ * @param val
+ * object to test
+ * @param msg
+ * message to output if validation fails
*/
public static void isTrue(boolean val, String msg) {
- if (!val)
+ if (!val) {
throw new IllegalArgumentException(msg);
+ }
}
/**
* Validates that the value is false
- * @param val object to test
+ *
+ * @param val
+ * object to test
*/
public static void isFalse(boolean val) {
- if (val)
+ if (val) {
throw new IllegalArgumentException("Must be false");
+ }
}
/**
* Validates that the value is false
- * @param val object to test
- * @param msg message to output if validation fails
+ *
+ * @param val
+ * object to test
+ * @param msg
+ * message to output if validation fails
*/
public static void isFalse(boolean val, String msg) {
- if (val)
+ if (val) {
throw new IllegalArgumentException(msg);
+ }
}
/**
* Validates that the array contains no null elements
- * @param objects the array to test
+ *
+ * @param objects
+ * the array to test
*/
public static void noNullElements(Object[] objects) {
noNullElements(objects, "Array must not contain any null objects");
@@ -74,37 +98,51 @@ public final class Validate {
/**
* Validates that the array contains no null elements
- * @param objects the array to test
- * @param msg message to output if validation fails
+ *
+ * @param objects
+ * the array to test
+ * @param msg
+ * message to output if validation fails
*/
public static void noNullElements(Object[] objects, String msg) {
- for (Object obj : objects)
- if (obj == null)
+ for (Object obj : objects) {
+ if (obj == null) {
throw new IllegalArgumentException(msg);
+ }
+ }
}
/**
* Validates that the string is not empty
- * @param string the string to test
+ *
+ * @param string
+ * the string to test
*/
public static void notEmpty(String string) {
- if (string == null || string.length() == 0)
+ if (string == null || string.length() == 0) {
throw new IllegalArgumentException("String must not be empty");
+ }
}
/**
* Validates that the string is not empty
- * @param string the string to test
- * @param msg message to output if validation fails
+ *
+ * @param string
+ * the string to test
+ * @param msg
+ * message to output if validation fails
*/
public static void notEmpty(String string, String msg) {
- if (string == null || string.length() == 0)
+ if (string == null || string.length() == 0) {
throw new IllegalArgumentException(msg);
+ }
}
/**
- Cause a failure.
- @param msg message to output.
+ * Cause a failure.
+ *
+ * @param msg
+ * message to output.
*/
public static void fail(String msg) {
throw new IllegalArgumentException(msg);
diff --git a/server/src/org/jsoup/nodes/Attribute.java b/server/src/org/jsoup/nodes/Attribute.java
index 02eb29db83..5f27b4fcc4 100644
--- a/server/src/org/jsoup/nodes/Attribute.java
+++ b/server/src/org/jsoup/nodes/Attribute.java
@@ -1,21 +1,26 @@
package org.jsoup.nodes;
-import org.jsoup.helper.Validate;
-
import java.util.Map;
-/**
- A single key + value attribute. Keys are trimmed and normalised to lower-case.
+import org.jsoup.helper.Validate;
- @author Jonathan Hedley, jonathan@hedley.net */
-public class Attribute implements Map.Entry<String, String>, Cloneable {
+/**
+ * A single key + value attribute. Keys are trimmed and normalised to
+ * lower-case.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class Attribute implements Map.Entry<String, String>, Cloneable {
private String key;
private String value;
/**
* Create a new attribute from unencoded (raw) key and value.
- * @param key attribute key
- * @param value attribute value
+ *
+ * @param key
+ * attribute key
+ * @param value
+ * attribute value
* @see #createFromEncoded
*/
public Attribute(String key, String value) {
@@ -26,16 +31,20 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}
/**
- Get the attribute key.
- @return the attribute key
+ * Get the attribute key.
+ *
+ * @return the attribute key
*/
+ @Override
public String getKey() {
return key;
}
/**
- Set the attribute key. Gets normalised as per the constructor method.
- @param key the new key; must not be null
+ * Set the attribute key. Gets normalised as per the constructor method.
+ *
+ * @param key
+ * the new key; must not be null
*/
public void setKey(String key) {
Validate.notEmpty(key);
@@ -43,17 +52,22 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}
/**
- Get the attribute value.
- @return the attribute value
+ * Get the attribute value.
+ *
+ * @return the attribute value
*/
+ @Override
public String getValue() {
return value;
}
/**
- Set the attribute value.
- @param value the new attribute value; must not be null
+ * Set the attribute value.
+ *
+ * @param value
+ * the new attribute value; must not be null
*/
+ @Override
public String setValue(String value) {
Validate.notNull(value);
String old = this.value;
@@ -62,53 +76,73 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}
/**
- Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
- @return HTML
+ * Get the HTML representation of this attribute; e.g.
+ * {@code href="index.html"}.
+ *
+ * @return HTML
*/
public String html() {
- return key + "=\"" + Entities.escape(value, (new Document("")).outputSettings()) + "\"";
+ return key + "=\""
+ + Entities.escape(value, (new Document("")).outputSettings())
+ + "\"";
}
-
+
protected void html(StringBuilder accum, Document.OutputSettings out) {
- accum
- .append(key)
- .append("=\"")
- .append(Entities.escape(value, out))
- .append("\"");
+ accum.append(key).append("=\"").append(Entities.escape(value, out))
+ .append("\"");
}
/**
- Get the string representation of this attribute, implemented as {@link #html()}.
- @return string
+ * Get the string representation of this attribute, implemented as
+ * {@link #html()}.
+ *
+ * @return string
*/
+ @Override
public String toString() {
return html();
}
/**
- * Create a new Attribute from an unencoded key and a HTML attribute encoded value.
- * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
- * @param encodedValue HTML attribute encoded value
+ * Create a new Attribute from an unencoded key and a HTML attribute encoded
+ * value.
+ *
+ * @param unencodedKey
+ * assumes the key is not encoded, as can be only run of simple
+ * \w chars.
+ * @param encodedValue
+ * HTML attribute encoded value
* @return attribute
*/
- public static Attribute createFromEncoded(String unencodedKey, String encodedValue) {
+ public static Attribute createFromEncoded(String unencodedKey,
+ String encodedValue) {
String value = Entities.unescape(encodedValue, true);
return new Attribute(unencodedKey, value);
}
protected boolean isDataAttribute() {
- return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length();
+ return key.startsWith(Attributes.dataPrefix)
+ && key.length() > Attributes.dataPrefix.length();
}
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof Attribute)) return false;
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Attribute)) {
+ return false;
+ }
Attribute attribute = (Attribute) o;
- if (key != null ? !key.equals(attribute.key) : attribute.key != null) return false;
- if (value != null ? !value.equals(attribute.value) : attribute.value != null) return false;
+ if (key != null ? !key.equals(attribute.key) : attribute.key != null) {
+ return false;
+ }
+ if (value != null ? !value.equals(attribute.value)
+ : attribute.value != null) {
+ return false;
+ }
return true;
}
@@ -123,7 +157,9 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
@Override
public Attribute clone() {
try {
- return (Attribute) super.clone(); // only fields are immutable strings key and value, so no more deep copy required
+ return (Attribute) super.clone(); // only fields are immutable
+ // strings key and value, so no
+ // more deep copy required
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
diff --git a/server/src/org/jsoup/nodes/Attributes.java b/server/src/org/jsoup/nodes/Attributes.java
index 9436750fc9..8757d1bf97 100644
--- a/server/src/org/jsoup/nodes/Attributes.java
+++ b/server/src/org/jsoup/nodes/Attributes.java
@@ -1,46 +1,63 @@
package org.jsoup.nodes;
-import org.jsoup.helper.Validate;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
-import java.util.*;
+import org.jsoup.helper.Validate;
/**
* The attributes of an Element.
* <p/>
- * Attributes are treated as a map: there can be only one value associated with an attribute key.
+ * Attributes are treated as a map: there can be only one value associated with
+ * an attribute key.
* <p/>
- * Attribute key and value comparisons are done case insensitively, and keys are normalised to
- * lower-case.
+ * Attribute key and value comparisons are done case insensitively, and keys are
+ * normalised to lower-case.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Attributes implements Iterable<Attribute>, Cloneable {
protected static final String dataPrefix = "data-";
-
+
private LinkedHashMap<String, Attribute> attributes = null;
+
// linked hash map to preserve insertion order.
- // null be default as so many elements have no attributes -- saves a good chunk of memory
+ // null be default as so many elements have no attributes -- saves a good
+ // chunk of memory
/**
- Get an attribute value by key.
- @param key the attribute key
- @return the attribute value if set; or empty string if not set.
- @see #hasKey(String)
+ * Get an attribute value by key.
+ *
+ * @param key
+ * the attribute key
+ * @return the attribute value if set; or empty string if not set.
+ * @see #hasKey(String)
*/
public String get(String key) {
Validate.notEmpty(key);
- if (attributes == null)
+ if (attributes == null) {
return "";
+ }
Attribute attr = attributes.get(key.toLowerCase());
return attr != null ? attr.getValue() : "";
}
/**
- Set a new attribute, or replace an existing one by key.
- @param key attribute key
- @param value attribute value
+ * Set a new attribute, or replace an existing one by key.
+ *
+ * @param key
+ * attribute key
+ * @param value
+ * attribute value
*/
public void put(String key, String value) {
Attribute attr = new Attribute(key, value);
@@ -48,70 +65,88 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}
/**
- Set a new attribute, or replace an existing one by key.
- @param attribute attribute
+ * Set a new attribute, or replace an existing one by key.
+ *
+ * @param attribute
+ * attribute
*/
public void put(Attribute attribute) {
Validate.notNull(attribute);
- if (attributes == null)
- attributes = new LinkedHashMap<String, Attribute>(2);
+ if (attributes == null) {
+ attributes = new LinkedHashMap<String, Attribute>(2);
+ }
attributes.put(attribute.getKey(), attribute);
}
/**
- Remove an attribute by key.
- @param key attribute key to remove
+ * Remove an attribute by key.
+ *
+ * @param key
+ * attribute key to remove
*/
public void remove(String key) {
Validate.notEmpty(key);
- if (attributes == null)
+ if (attributes == null) {
return;
+ }
attributes.remove(key.toLowerCase());
}
/**
- Tests if these attributes contain an attribute with this key.
- @param key key to check for
- @return true if key exists, false otherwise
+ * Tests if these attributes contain an attribute with this key.
+ *
+ * @param key
+ * key to check for
+ * @return true if key exists, false otherwise
*/
public boolean hasKey(String key) {
return attributes != null && attributes.containsKey(key.toLowerCase());
}
/**
- Get the number of attributes in this set.
- @return size
+ * Get the number of attributes in this set.
+ *
+ * @return size
*/
public int size() {
- if (attributes == null)
+ if (attributes == null) {
return 0;
+ }
return attributes.size();
}
/**
- Add all the attributes from the incoming set to this set.
- @param incoming attributes to add to these attributes.
+ * Add all the attributes from the incoming set to this set.
+ *
+ * @param incoming
+ * attributes to add to these attributes.
*/
public void addAll(Attributes incoming) {
- if (incoming.size() == 0)
+ if (incoming.size() == 0) {
return;
- if (attributes == null)
+ }
+ if (attributes == null) {
attributes = new LinkedHashMap<String, Attribute>(incoming.size());
+ }
attributes.putAll(incoming.attributes);
}
-
+
+ @Override
public Iterator<Attribute> iterator() {
return asList().iterator();
}
/**
- Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
- to keys will not be recognised in the containing set.
- @return an view of the attributes as a List.
+ * Get the attributes as a List, for iteration. Do not modify the keys of
+ * the attributes via this view, as changes to keys will not be recognised
+ * in the containing set.
+ *
+ * @return an view of the attributes as a List.
*/
public List<Attribute> asList() {
- if (attributes == null)
+ if (attributes == null) {
return Collections.emptyList();
+ }
List<Attribute> list = new ArrayList<Attribute>(attributes.size());
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
@@ -121,8 +156,9 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}
/**
- * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
- * starting with {@code data-}.
+ * Retrieves a filtered view of attributes that are HTML5 custom data
+ * attributes; that is, attributes with keys starting with {@code data-}.
+ *
* @return map of custom data attributes.
*/
public Map<String, String> dataset() {
@@ -130,42 +166,54 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}
/**
- Get the HTML representation of these attributes.
- @return HTML
+ * Get the HTML representation of these attributes.
+ *
+ * @return HTML
*/
public String html() {
StringBuilder accum = new StringBuilder();
- html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
+ html(accum, (new Document("")).outputSettings()); // output settings a
+ // bit funky, but this
+ // html() seldom used
return accum.toString();
}
-
+
void html(StringBuilder accum, Document.OutputSettings out) {
- if (attributes == null)
+ if (attributes == null) {
return;
-
+ }
+
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
Attribute attribute = entry.getValue();
accum.append(" ");
attribute.html(accum, out);
}
}
-
+
+ @Override
public String toString() {
return html();
}
-
+
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof Attributes)) return false;
-
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Attributes)) {
+ return false;
+ }
+
Attributes that = (Attributes) o;
-
- if (attributes != null ? !attributes.equals(that.attributes) : that.attributes != null) return false;
-
+
+ if (attributes != null ? !attributes.equals(that.attributes)
+ : that.attributes != null) {
+ return false;
+ }
+
return true;
}
-
+
@Override
public int hashCode() {
return attributes != null ? attributes.hashCode() : 0;
@@ -173,8 +221,9 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
@Override
public Attributes clone() {
- if (attributes == null)
+ if (attributes == null) {
return new Attributes();
+ }
Attributes clone;
try {
@@ -182,19 +231,23 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
- clone.attributes = new LinkedHashMap<String, Attribute>(attributes.size());
- for (Attribute attribute: this)
+ clone.attributes = new LinkedHashMap<String, Attribute>(
+ attributes.size());
+ for (Attribute attribute : this) {
clone.attributes.put(attribute.getKey(), attribute.clone());
+ }
return clone;
}
private class Dataset extends AbstractMap<String, String> {
private Dataset() {
- if (attributes == null)
+ if (attributes == null) {
attributes = new LinkedHashMap<String, Attribute>(2);
+ }
}
+ @Override
public Set<Entry<String, String>> entrySet() {
return new EntrySet();
}
@@ -202,41 +255,54 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
@Override
public String put(String key, String value) {
String dataKey = dataKey(key);
- String oldValue = hasKey(dataKey) ? attributes.get(dataKey).getValue() : null;
+ String oldValue = hasKey(dataKey) ? attributes.get(dataKey)
+ .getValue() : null;
Attribute attr = new Attribute(dataKey, value);
attributes.put(dataKey, attr);
return oldValue;
}
private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
+ @Override
public Iterator<Map.Entry<String, String>> iterator() {
return new DatasetIterator();
}
+ @Override
public int size() {
int count = 0;
Iterator iter = new DatasetIterator();
- while (iter.hasNext())
+ while (iter.hasNext()) {
count++;
+ }
return count;
}
}
- private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
- private Iterator<Attribute> attrIter = attributes.values().iterator();
+ private class DatasetIterator implements
+ Iterator<Map.Entry<String, String>> {
+ private Iterator<Attribute> attrIter = attributes.values()
+ .iterator();
private Attribute attr;
+
+ @Override
public boolean hasNext() {
while (attrIter.hasNext()) {
attr = attrIter.next();
- if (attr.isDataAttribute()) return true;
+ if (attr.isDataAttribute()) {
+ return true;
+ }
}
return false;
}
+ @Override
public Entry<String, String> next() {
- return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
+ return new Attribute(attr.getKey().substring(
+ dataPrefix.length()), attr.getValue());
}
+ @Override
public void remove() {
attributes.remove(attr.getKey());
}
diff --git a/server/src/org/jsoup/nodes/Comment.java b/server/src/org/jsoup/nodes/Comment.java
index 37fd4368fa..6abe0e3066 100644
--- a/server/src/org/jsoup/nodes/Comment.java
+++ b/server/src/org/jsoup/nodes/Comment.java
@@ -1,45 +1,55 @@
package org.jsoup.nodes;
/**
- A comment node.
-
- @author Jonathan Hedley, jonathan@hedley.net */
+ * A comment node.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class Comment extends Node {
private static final String COMMENT_KEY = "comment";
/**
- Create a new comment node.
- @param data The contents of the comment
- @param baseUri base URI
+ * Create a new comment node.
+ *
+ * @param data
+ * The contents of the comment
+ * @param baseUri
+ * base URI
*/
public Comment(String data, String baseUri) {
super(baseUri);
attributes.put(COMMENT_KEY, data);
}
+ @Override
public String nodeName() {
return "#comment";
}
/**
- Get the contents of the comment.
- @return comment content
+ * Get the contents of the comment.
+ *
+ * @return comment content
*/
public String getData() {
return attributes.get(COMMENT_KEY);
}
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
- if (out.prettyPrint())
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ if (out.prettyPrint()) {
indent(accum, depth, out);
- accum
- .append("<!--")
- .append(getData())
- .append("-->");
+ }
+ accum.append("<!--").append(getData()).append("-->");
}
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ }
+ @Override
public String toString() {
return outerHtml();
}
diff --git a/server/src/org/jsoup/nodes/DataNode.java b/server/src/org/jsoup/nodes/DataNode.java
index a64f56f0a4..cc377a4cc8 100644
--- a/server/src/org/jsoup/nodes/DataNode.java
+++ b/server/src/org/jsoup/nodes/DataNode.java
@@ -1,29 +1,37 @@
package org.jsoup.nodes;
/**
- A data node, for contents of style, script tags etc, where contents should not show in text().
-
- @author Jonathan Hedley, jonathan@hedley.net */
-public class DataNode extends Node{
+ * A data node, for contents of style, script tags etc, where contents should
+ * not show in text().
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class DataNode extends Node {
private static final String DATA_KEY = "data";
/**
- Create a new DataNode.
- @param data data contents
- @param baseUri base URI
+ * Create a new DataNode.
+ *
+ * @param data
+ * data contents
+ * @param baseUri
+ * base URI
*/
public DataNode(String data, String baseUri) {
super(baseUri);
attributes.put(DATA_KEY, data);
}
+ @Override
public String nodeName() {
return "#data";
}
/**
- Get the data contents of this node. Will be unescaped and with original new lines, space etc.
- @return data
+ * Get the data contents of this node. Will be unescaped and with original
+ * new lines, space etc.
+ *
+ * @return data
*/
public String getWholeData() {
return attributes.get(DATA_KEY);
@@ -31,7 +39,9 @@ public class DataNode extends Node{
/**
* Set the data contents of this node.
- * @param data unencoded data
+ *
+ * @param data
+ * unencoded data
* @return this node, for chaining
*/
public DataNode setWholeData(String data) {
@@ -39,21 +49,31 @@ public class DataNode extends Node{
return this;
}
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
- accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ accum.append(getWholeData()); // data is not escaped in return from data
+ // nodes, so " in script, style is plain
}
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ }
+ @Override
public String toString() {
return outerHtml();
}
/**
- Create a new DataNode from HTML encoded data.
- @param encodedData encoded data
- @param baseUri bass URI
- @return new DataNode
+ * Create a new DataNode from HTML encoded data.
+ *
+ * @param encodedData
+ * encoded data
+ * @param baseUri
+ * bass URI
+ * @return new DataNode
*/
public static DataNode createFromEncoded(String encodedData, String baseUri) {
String data = Entities.unescape(encodedData);
diff --git a/server/src/org/jsoup/nodes/Document.java b/server/src/org/jsoup/nodes/Document.java
index adb371ce14..f1c4595faa 100644
--- a/server/src/org/jsoup/nodes/Document.java
+++ b/server/src/org/jsoup/nodes/Document.java
@@ -1,36 +1,42 @@
package org.jsoup.nodes;
-import org.jsoup.helper.Validate;
-import org.jsoup.parser.Tag;
-import org.jsoup.select.Elements;
-
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.ArrayList;
import java.util.List;
-/**
- A HTML Document.
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Tag;
+import org.jsoup.select.Elements;
- @author Jonathan Hedley, jonathan@hedley.net */
+/**
+ * A HTML Document.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class Document extends Element {
private OutputSettings outputSettings = new OutputSettings();
private QuirksMode quirksMode = QuirksMode.noQuirks;
/**
- Create a new, empty Document.
- @param baseUri base URI of document
- @see org.jsoup.Jsoup#parse
- @see #createShell
+ * Create a new, empty Document.
+ *
+ * @param baseUri
+ * base URI of document
+ * @see org.jsoup.Jsoup#parse
+ * @see #createShell
*/
public Document(String baseUri) {
super(Tag.valueOf("#root"), baseUri);
}
/**
- Create a valid, empty shell of a document, suitable for adding more elements to.
- @param baseUri baseUri of document
- @return document with html, head, and body elements.
+ * Create a valid, empty shell of a document, suitable for adding more
+ * elements to.
+ *
+ * @param baseUri
+ * baseUri of document
+ * @return document with html, head, and body elements.
*/
static public Document createShell(String baseUri) {
Validate.notNull(baseUri);
@@ -44,24 +50,27 @@ public class Document extends Element {
}
/**
- Accessor to the document's {@code head} element.
- @return {@code head}
+ * Accessor to the document's {@code head} element.
+ *
+ * @return {@code head}
*/
public Element head() {
return findFirstElementByTagName("head", this);
}
/**
- Accessor to the document's {@code body} element.
- @return {@code body}
+ * Accessor to the document's {@code body} element.
+ *
+ * @return {@code body}
*/
public Element body() {
return findFirstElementByTagName("body", this);
}
/**
- Get the string contents of the document's {@code title} element.
- @return Trimmed title, or empty string if none set.
+ * Get the string contents of the document's {@code title} element.
+ *
+ * @return Trimmed title, or empty string if none set.
*/
public String title() {
Element titleEl = getElementsByTag("title").first();
@@ -69,9 +78,11 @@ public class Document extends Element {
}
/**
- Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if
- not present
- @param title string to set as title
+ * Set the document's {@code title} element. Updates the existing element,
+ * or adds {@code title} to {@code head} if not present
+ *
+ * @param title
+ * string to set as title
*/
public void title(String title) {
Validate.notNull(title);
@@ -84,29 +95,38 @@ public class Document extends Element {
}
/**
- Create a new Element, with this document's base uri. Does not make the new element a child of this document.
- @param tagName element tag name (e.g. {@code a})
- @return new element
+ * Create a new Element, with this document's base uri. Does not make the
+ * new element a child of this document.
+ *
+ * @param tagName
+ * element tag name (e.g. {@code a})
+ * @return new element
*/
public Element createElement(String tagName) {
- return new Element(Tag.valueOf(tagName), this.baseUri());
+ return new Element(Tag.valueOf(tagName), baseUri());
}
/**
- Normalise the document. This happens after the parse phase so generally does not need to be called.
- Moves any text content that is not in the body element into the body.
- @return this document after normalisation
+ * Normalise the document. This happens after the parse phase so generally
+ * does not need to be called. Moves any text content that is not in the
+ * body element into the body.
+ *
+ * @return this document after normalisation
*/
public Document normalise() {
Element htmlEl = findFirstElementByTagName("html", this);
- if (htmlEl == null)
+ if (htmlEl == null) {
htmlEl = appendElement("html");
- if (head() == null)
+ }
+ if (head() == null) {
htmlEl.prependElement("head");
- if (body() == null)
+ }
+ if (body() == null) {
htmlEl.appendElement("body");
+ }
- // pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care
+ // pull text nodes out of root, html, and head els, and push into body.
+ // non-text nodes are already taken care
// of. do in inverse order to maintain text order.
normaliseTextNodes(head());
normaliseTextNodes(htmlEl);
@@ -114,22 +134,23 @@ public class Document extends Element {
normaliseStructure("head", htmlEl);
normaliseStructure("body", htmlEl);
-
+
return this;
}
// does not recurse.
private void normaliseTextNodes(Element element) {
List<Node> toMove = new ArrayList<Node>();
- for (Node node: element.childNodes) {
+ for (Node node : element.childNodes) {
if (node instanceof TextNode) {
TextNode tn = (TextNode) node;
- if (!tn.isBlank())
+ if (!tn.isBlank()) {
toMove.add(tn);
+ }
}
}
- for (int i = toMove.size()-1; i >= 0; i--) {
+ for (int i = toMove.size() - 1; i >= 0; i--) {
Node node = toMove.get(i);
element.removeChild(node);
body().prependChild(new TextNode(" ", ""));
@@ -137,37 +158,42 @@ public class Document extends Element {
}
}
- // merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
+ // merge multiple <head> or <body> contents into one, delete the remainder,
+ // and ensure they are owned by <html>
private void normaliseStructure(String tag, Element htmlEl) {
- Elements elements = this.getElementsByTag(tag);
- Element master = elements.first(); // will always be available as created above if not existent
+ Elements elements = getElementsByTag(tag);
+ Element master = elements.first(); // will always be available as
+ // created above if not existent
if (elements.size() > 1) { // dupes, move contents to master
List<Node> toMove = new ArrayList<Node>();
for (int i = 1; i < elements.size(); i++) {
Node dupe = elements.get(i);
- for (Node node : dupe.childNodes)
+ for (Node node : dupe.childNodes) {
toMove.add(node);
+ }
dupe.remove();
}
- for (Node dupe : toMove)
+ for (Node dupe : toMove) {
master.appendChild(dupe);
+ }
}
// ensure parented by <html>
if (!master.parent().equals(htmlEl)) {
- htmlEl.appendChild(master); // includes remove()
+ htmlEl.appendChild(master); // includes remove()
}
}
// fast method to get first by tag name, used for html, head, body finders
private Element findFirstElementByTagName(String tag, Node node) {
- if (node.nodeName().equals(tag))
+ if (node.nodeName().equals(tag)) {
return (Element) node;
- else {
- for (Node child: node.childNodes) {
+ } else {
+ for (Node child : node.childNodes) {
Element found = findFirstElementByTagName(tag, child);
- if (found != null)
+ if (found != null) {
return found;
+ }
}
}
return null;
@@ -179,9 +205,12 @@ public class Document extends Element {
}
/**
- Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
- @param text unencoded text
- @return this document
+ * Set the text of the {@code body} of this document. Any existing nodes
+ * within the body will be cleared.
+ *
+ * @param text
+ * unencoded text
+ * @return this document
*/
@Override
public Element text(String text) {
@@ -197,12 +226,13 @@ public class Document extends Element {
@Override
public Document clone() {
Document clone = (Document) super.clone();
- clone.outputSettings = this.outputSettings.clone();
+ clone.outputSettings = outputSettings.clone();
return clone;
}
/**
- * A Document's output settings control the form of the text() and html() methods.
+ * A Document's output settings control the form of the text() and html()
+ * methods.
*/
public static class OutputSettings implements Cloneable {
private Entities.EscapeMode escapeMode = Entities.EscapeMode.base;
@@ -211,14 +241,18 @@ public class Document extends Element {
private boolean prettyPrint = true;
private int indentAmount = 1;
- public OutputSettings() {}
+ public OutputSettings() {
+ }
/**
- * Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML
- * entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>,
- * which uses the complete set of HTML named entities.
+ * Get the document's current HTML escape mode: <code>base</code>, which
+ * provides a limited set of named HTML entities and escapes other
+ * characters as numbered entities for maximum compatibility; or
+ * <code>extended</code>, which uses the complete set of HTML named
+ * entities.
* <p>
* The default escape mode is <code>base</code>.
+ *
* @return the document's current escape mode
*/
public Entities.EscapeMode escapeMode() {
@@ -227,7 +261,9 @@ public class Document extends Element {
/**
* Set the document's escape mode
- * @param escapeMode the new escape mode to use
+ *
+ * @param escapeMode
+ * the new escape mode to use
* @return the document's output settings, for chaining
*/
public OutputSettings escapeMode(Entities.EscapeMode escapeMode) {
@@ -236,11 +272,14 @@ public class Document extends Element {
}
/**
- * Get the document's current output charset, which is used to control which characters are escaped when
- * generating HTML (via the <code>html()</code> methods), and which are kept intact.
+ * Get the document's current output charset, which is used to control
+ * which characters are escaped when generating HTML (via the
+ * <code>html()</code> methods), and which are kept intact.
* <p>
- * Where possible (when parsing from a URL or File), the document's output charset is automatically set to the
- * input charset. Otherwise, it defaults to UTF-8.
+ * Where possible (when parsing from a URL or File), the document's
+ * output charset is automatically set to the input charset. Otherwise,
+ * it defaults to UTF-8.
+ *
* @return the document's current charset.
*/
public Charset charset() {
@@ -249,7 +288,9 @@ public class Document extends Element {
/**
* Update the document's output charset.
- * @param charset the new charset to use.
+ *
+ * @param charset
+ * the new charset to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(Charset charset) {
@@ -261,7 +302,9 @@ public class Document extends Element {
/**
* Update the document's output charset.
- * @param charset the new charset (by name) to use.
+ *
+ * @param charset
+ * the new charset (by name) to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(String charset) {
@@ -274,8 +317,10 @@ public class Document extends Element {
}
/**
- * Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format
- * the output, and the output will generally look like the input.
+ * Get if pretty printing is enabled. Default is true. If disabled, the
+ * HTML output methods will not re-format the output, and the output
+ * will generally look like the input.
+ *
* @return if pretty printing is enabled.
*/
public boolean prettyPrint() {
@@ -284,7 +329,9 @@ public class Document extends Element {
/**
* Enable or disable pretty printing.
- * @param pretty new pretty print setting
+ *
+ * @param pretty
+ * new pretty print setting
* @return this, for chaining
*/
public OutputSettings prettyPrint(boolean pretty) {
@@ -294,6 +341,7 @@ public class Document extends Element {
/**
* Get the current tag indent amount, used when pretty printing.
+ *
* @return the current indent amount
*/
public int indentAmount() {
@@ -302,7 +350,10 @@ public class Document extends Element {
/**
* Set the indent amount for pretty printing
- * @param indentAmount number of spaces to use for indenting each level. Must be >= 0.
+ *
+ * @param indentAmount
+ * number of spaces to use for indenting each level. Must be
+ * >= 0.
* @return this, for chaining
*/
public OutputSettings indentAmount(int indentAmount) {
@@ -321,13 +372,15 @@ public class Document extends Element {
}
clone.charset(charset.name()); // new charset and charset encoder
clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
- // indentAmount, prettyPrint are primitives so object.clone() will handle
+ // indentAmount, prettyPrint are primitives so object.clone() will
+ // handle
return clone;
}
}
/**
* Get the document's current output settings.
+ *
* @return the document's current output settings.
*/
public OutputSettings outputSettings() {
@@ -347,4 +400,3 @@ public class Document extends Element {
return this;
}
}
-
diff --git a/server/src/org/jsoup/nodes/DocumentType.java b/server/src/org/jsoup/nodes/DocumentType.java
index f8c79f0d18..13ff78dc8b 100644
--- a/server/src/org/jsoup/nodes/DocumentType.java
+++ b/server/src/org/jsoup/nodes/DocumentType.java
@@ -11,12 +11,18 @@ public class DocumentType extends Node {
/**
* Create a new doctype element.
- * @param name the doctype's name
- * @param publicId the doctype's public ID
- * @param systemId the doctype's system ID
- * @param baseUri the doctype's base URI
+ *
+ * @param name
+ * the doctype's name
+ * @param publicId
+ * the doctype's public ID
+ * @param systemId
+ * the doctype's system ID
+ * @param baseUri
+ * the doctype's base URI
*/
- public DocumentType(String name, String publicId, String systemId, String baseUri) {
+ public DocumentType(String name, String publicId, String systemId,
+ String baseUri) {
super(baseUri);
Validate.notEmpty(name);
@@ -31,16 +37,20 @@ public class DocumentType extends Node {
}
@Override
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
accum.append("<!DOCTYPE ").append(attr("name"));
- if (!StringUtil.isBlank(attr("publicId")))
+ if (!StringUtil.isBlank(attr("publicId"))) {
accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
- if (!StringUtil.isBlank(attr("systemId")))
+ }
+ if (!StringUtil.isBlank(attr("systemId"))) {
accum.append(" \"").append(attr("systemId")).append("\"");
+ }
accum.append('>');
}
@Override
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
}
}
diff --git a/server/src/org/jsoup/nodes/Element.java b/server/src/org/jsoup/nodes/Element.java
index 5c1894c934..ff9e68b962 100644
--- a/server/src/org/jsoup/nodes/Element.java
+++ b/server/src/org/jsoup/nodes/Element.java
@@ -1,5 +1,15 @@
package org.jsoup.nodes;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
@@ -9,44 +19,46 @@ import org.jsoup.select.Elements;
import org.jsoup.select.Evaluator;
import org.jsoup.select.Selector;
-import java.util.*;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
-
/**
- * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
- * other elements).
+ * A HTML element consists of a tag name, attributes, and child nodes (including
+ * text nodes and other elements).
*
- * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
+ * From an Element, you can extract data, traverse the node graph, and
+ * manipulate the HTML.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Element extends Node {
private Tag tag;
private Set<String> classNames;
-
+
/**
* Create a new, standalone Element. (Standalone in that is has no parent.)
*
- * @param tag tag of this element
- * @param baseUri the base URI
- * @param attributes initial attributes
+ * @param tag
+ * tag of this element
+ * @param baseUri
+ * the base URI
+ * @param attributes
+ * initial attributes
* @see #appendChild(Node)
* @see #appendElement(String)
*/
public Element(Tag tag, String baseUri, Attributes attributes) {
super(baseUri, attributes);
-
- Validate.notNull(tag);
+
+ Validate.notNull(tag);
this.tag = tag;
}
-
+
/**
* Create a new Element from a tag and a base URI.
*
- * @param tag element tag
- * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
- * string, but not null.
+ * @param tag
+ * element tag
+ * @param baseUri
+ * the base URI of this element. It is acceptable for the base
+ * URI to be an empty string, but not null.
* @see Tag#valueOf(String)
*/
public Element(Tag tag, String baseUri) {
@@ -68,10 +80,11 @@ public class Element extends Node {
}
/**
- * Change the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
- * {@code el.tagName("div");}.
- *
- * @param tagName new tag name for this element
+ * Change the tag of this element. For example, convert a {@code <span>} to
+ * a {@code <div>} with {@code el.tagName("div");}.
+ *
+ * @param tagName
+ * new tag name for this element
* @return this element, for chaining
*/
public Element tagName(String tagName) {
@@ -88,10 +101,10 @@ public class Element extends Node {
public Tag tag() {
return tag;
}
-
+
/**
- * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
- * {@code <p> == false}).
+ * Test if this element is a block-level element. (E.g.
+ * {@code <div> == true} or an inline element {@code <p> == false}).
*
* @return true if block, false if not (and thus inline)
*/
@@ -110,27 +123,32 @@ public class Element extends Node {
}
/**
- * Set an attribute value on this element. If this element already has an attribute with the
- * key, its value is updated; otherwise, a new attribute is added.
+ * Set an attribute value on this element. If this element already has an
+ * attribute with the key, its value is updated; otherwise, a new attribute
+ * is added.
*
* @return this element
*/
+ @Override
public Element attr(String attributeKey, String attributeValue) {
super.attr(attributeKey, attributeValue);
return this;
}
/**
- * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
- * starting with "data-" is included the dataset.
+ * Get this element's HTML5 custom data attributes. Each attribute in the
+ * element that has a key starting with "data-" is included the dataset.
* <p>
- * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
- * {@code package=jsoup, language=java}.
+ * E.g., the element
+ * {@code <div data-package="jsoup" data-language="Java" class="group">...}
+ * has the dataset {@code package=jsoup, language=java}.
* <p>
- * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
- * in the other map.
+ * This map is a filtered view of the element's attribute map. Changes to
+ * one map (add, remove, update) are reflected in the other map.
* <p>
- * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
+ * You can find elements that have data attributes using the
+ * {@code [^data-]} attribute key prefix selector.
+ *
* @return a map of {@code key=value} custom data attributes.
*/
public Map<String, String> dataset() {
@@ -144,6 +162,7 @@ public class Element extends Node {
/**
* Get this element's parent and ancestors, up to the document root.
+ *
* @return this element's stack of parents, closest first.
*/
public Elements parents() {
@@ -163,10 +182,12 @@ public class Element extends Node {
/**
* Get a child element of this element, by its 0-based index number.
* <p/>
- * Note that an element can have both mixed Nodes and Elements as children. This method inspects
- * a filtered list of children that are elements, and the index is based on that filtered list.
+ * Note that an element can have both mixed Nodes and Elements as children.
+ * This method inspects a filtered list of children that are elements, and
+ * the index is based on that filtered list.
*
- * @param index the index number of the element to retrieve
+ * @param index
+ * the index number of the element to retrieve
* @return the child element, if it exists, or {@code null} if absent.
* @see #childNode(int)
*/
@@ -177,140 +198,165 @@ public class Element extends Node {
/**
* Get this element's child elements.
* <p/>
- * This is effectively a filter on {@link #childNodes()} to get Element nodes.
- * @return child elements. If this element has no children, returns an
- * empty list.
+ * This is effectively a filter on {@link #childNodes()} to get Element
+ * nodes.
+ *
+ * @return child elements. If this element has no children, returns an empty
+ * list.
* @see #childNodes()
*/
public Elements children() {
- // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change
+ // create on the fly rather than maintaining two lists. if gets slow,
+ // memoize, and mark dirty on change
List<Element> elements = new ArrayList<Element>();
for (Node node : childNodes) {
- if (node instanceof Element)
+ if (node instanceof Element) {
elements.add((Element) node);
+ }
}
return new Elements(elements);
}
/**
- * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
+ * Get this element's child text nodes. The list is unmodifiable but the
+ * text nodes may be manipulated.
* <p/>
* This is effectively a filter on {@link #childNodes()} to get Text nodes.
+ *
* @return child text nodes. If this element has no text nodes, returns an
- * empty list.
- * <p/>
- * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
- * <ul>
- * <li>{@code p.text()} = {@code "One Two Three Four"}</li>
- * <li>{@code p.ownText()} = {@code "One Three Four"}</li>
- * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
- * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
- * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
- * </ul>
+ * empty list.
+ * <p/>
+ * For example, with the input HTML:
+ * {@code <p>One <span>Two</span> Three <br> Four</p>} with the
+ * {@code p} element selected: <ul>
+ * <li>{@code p.text()} = {@code "One Two Three Four"}</li>
+ * <li>{@code p.ownText()} = {@code "One Three Four"}</li>
+ * <li>{@code p.children()} = {@code Elements[<span>, <br>
+ * ]}</li>
+ * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ",
+ * <br>
+ * , " Four"]}</li>
+ * <li>{@code p.textNodes()} =
+ * {@code List<TextNode>["One ", " Three ", " Four"]}</li>
+ * </ul>
*/
public List<TextNode> textNodes() {
List<TextNode> textNodes = new ArrayList<TextNode>();
for (Node node : childNodes) {
- if (node instanceof TextNode)
+ if (node instanceof TextNode) {
textNodes.add((TextNode) node);
+ }
}
return Collections.unmodifiableList(textNodes);
}
/**
- * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
+ * Get this element's child data nodes. The list is unmodifiable but the
+ * data nodes may be manipulated.
* <p/>
* This is effectively a filter on {@link #childNodes()} to get Data nodes.
+ *
* @return child data nodes. If this element has no data nodes, returns an
- * empty list.
+ * empty list.
* @see #data()
*/
public List<DataNode> dataNodes() {
List<DataNode> dataNodes = new ArrayList<DataNode>();
for (Node node : childNodes) {
- if (node instanceof DataNode)
+ if (node instanceof DataNode) {
dataNodes.add((DataNode) node);
+ }
}
return Collections.unmodifiableList(dataNodes);
}
/**
- * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
- * may include this element, or any of its children.
+ * Find elements that match the {@link Selector} CSS query, with this
+ * element as the starting context. Matched elements may include this
+ * element, or any of its children.
* <p/>
- * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
- * multiple filters can be combined, e.g.:
+ * This method is generally more powerful to use than the DOM-type
+ * {@code getElementBy*} methods, because multiple filters can be combined,
+ * e.g.:
* <ul>
- * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
- * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
+ * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with
+ * {@code href} attributes)
+ * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to
+ * example.com (loosely)
* </ul>
* <p/>
* See the query syntax documentation in {@link org.jsoup.select.Selector}.
- *
- * @param cssQuery a {@link Selector} CSS-like query
+ *
+ * @param cssQuery
+ * a {@link Selector} CSS-like query
* @return elements that match the query (empty if none match)
* @see org.jsoup.select.Selector
*/
public Elements select(String cssQuery) {
return Selector.select(cssQuery, this);
}
-
+
/**
* Add a node child node to this element.
*
- * @param child node to add. Must not already have a parent.
+ * @param child
+ * node to add. Must not already have a parent.
* @return this element, so that you can add more child nodes or elements.
*/
public Element appendChild(Node child) {
Validate.notNull(child);
-
+
addChildren(child);
return this;
}
-
+
/**
* Add a node to the start of this element's children.
*
- * @param child node to add. Must not already have a parent.
+ * @param child
+ * node to add. Must not already have a parent.
* @return this element, so that you can add more child nodes or elements.
*/
public Element prependChild(Node child) {
Validate.notNull(child);
-
+
addChildren(0, child);
return this;
}
-
+
/**
* Create a new element by tag name, and add it as the last child.
*
- * @param tagName the name of the tag (e.g. {@code div}).
+ * @param tagName
+ * the name of the tag (e.g. {@code div}).
* @return the new element, to allow you to add content to it, e.g.:
- * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
+ * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
*/
public Element appendElement(String tagName) {
Element child = new Element(Tag.valueOf(tagName), baseUri());
appendChild(child);
return child;
}
-
+
/**
* Create a new element by tag name, and add it as the first child.
*
- * @param tagName the name of the tag (e.g. {@code div}).
+ * @param tagName
+ * the name of the tag (e.g. {@code div}).
* @return the new element, to allow you to add content to it, e.g.:
- * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
+ * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
*/
public Element prependElement(String tagName) {
Element child = new Element(Tag.valueOf(tagName), baseUri());
prependChild(child);
return child;
}
-
+
/**
* Create and append a new TextNode to this element.
*
- * @param text the unencoded text to add
+ * @param text
+ * the unencoded text to add
* @return this element
*/
public Element appendText(String text) {
@@ -318,11 +364,12 @@ public class Element extends Node {
appendChild(node);
return this;
}
-
+
/**
* Create and prepend a new TextNode to this element.
*
- * @param text the unencoded text to add
+ * @param text
+ * the unencoded text to add
* @return this element
*/
public Element prependText(String text) {
@@ -330,10 +377,13 @@ public class Element extends Node {
prependChild(node);
return this;
}
-
+
/**
- * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
- * @param html HTML to add inside this element, after the existing HTML
+ * Add inner HTML to this element. The supplied HTML will be parsed, and
+ * each node appended to the end of the children.
+ *
+ * @param html
+ * HTML to add inside this element, after the existing HTML
* @return this element
* @see #html(String)
*/
@@ -344,25 +394,30 @@ public class Element extends Node {
addChildren(nodes.toArray(new Node[nodes.size()]));
return this;
}
-
+
/**
- * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
- * @param html HTML to add inside this element, before the existing HTML
+ * Add inner HTML into this element. The supplied HTML will be parsed, and
+ * each node prepended to the start of the element's children.
+ *
+ * @param html
+ * HTML to add inside this element, before the existing HTML
* @return this element
* @see #html(String)
*/
public Element prepend(String html) {
Validate.notNull(html);
-
+
List<Node> nodes = Parser.parseFragment(html, this, baseUri());
addChildren(0, nodes.toArray(new Node[nodes.size()]));
return this;
}
/**
- * Insert the specified HTML into the DOM before this element (i.e. as a preceding sibling).
- *
- * @param html HTML to add before this element
+ * Insert the specified HTML into the DOM before this element (i.e. as a
+ * preceding sibling).
+ *
+ * @param html
+ * HTML to add before this element
* @return this element, for chaining
* @see #after(String)
*/
@@ -372,8 +427,11 @@ public class Element extends Node {
}
/**
- * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
- * @param node to add before this element
+ * Insert the specified node into the DOM before this node (i.e. as a
+ * preceding sibling).
+ *
+ * @param node
+ * to add before this element
* @return this Element, for chaining
* @see #after(Node)
*/
@@ -383,9 +441,11 @@ public class Element extends Node {
}
/**
- * Insert the specified HTML into the DOM after this element (i.e. as a following sibling).
- *
- * @param html HTML to add after this element
+ * Insert the specified HTML into the DOM after this element (i.e. as a
+ * following sibling).
+ *
+ * @param html
+ * HTML to add after this element
* @return this element, for chaining
* @see #before(String)
*/
@@ -395,8 +455,11 @@ public class Element extends Node {
}
/**
- * Insert the specified node into the DOM after this node (i.e. as a following sibling).
- * @param node to add after this element
+ * Insert the specified node into the DOM after this node (i.e. as a
+ * following sibling).
+ *
+ * @param node
+ * to add after this element
* @return this element, for chaining
* @see #before(Node)
*/
@@ -407,6 +470,7 @@ public class Element extends Node {
/**
* Remove all of the element's child nodes. Any attributes are left as-is.
+ *
* @return this element
*/
public Element empty() {
@@ -416,8 +480,10 @@ public class Element extends Node {
/**
* Wrap the supplied HTML around this element.
- *
- * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
+ *
+ * @param html
+ * HTML to wrap around this element, e.g.
+ * {@code <div class="head"></div>}. Can be arbitrarily deep.
* @return this element, for chaining.
*/
@Override
@@ -426,94 +492,118 @@ public class Element extends Node {
}
/**
- * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
- * of itself, so will not be included in the returned list.
+ * Get sibling elements. If the element has no sibling elements, returns an
+ * empty list. An element is not a sibling of itself, so will not be
+ * included in the returned list.
+ *
* @return sibling elements
*/
public Elements siblingElements() {
- if (parentNode == null)
+ if (parentNode == null) {
return new Elements(0);
+ }
List<Element> elements = parent().children();
Elements siblings = new Elements(elements.size() - 1);
- for (Element el: elements)
- if (el != this)
+ for (Element el : elements) {
+ if (el != this) {
siblings.add(el);
+ }
+ }
return siblings;
}
/**
- * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
- * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
+ * Gets the next sibling element of this element. E.g., if a {@code div}
+ * contains two {@code p}s, the {@code nextElementSibling} of the first
+ * {@code p} is the second {@code p}.
* <p/>
- * This is similar to {@link #nextSibling()}, but specifically finds only Elements
+ * This is similar to {@link #nextSibling()}, but specifically finds only
+ * Elements
+ *
* @return the next element, or null if there is no next element
* @see #previousElementSibling()
*/
public Element nextElementSibling() {
- if (parentNode == null) return null;
+ if (parentNode == null) {
+ return null;
+ }
List<Element> siblings = parent().children();
Integer index = indexInList(this, siblings);
Validate.notNull(index);
- if (siblings.size() > index+1)
- return siblings.get(index+1);
- else
+ if (siblings.size() > index + 1) {
+ return siblings.get(index + 1);
+ } else {
return null;
+ }
}
/**
* Gets the previous element sibling of this element.
+ *
* @return the previous element, or null if there is no previous element
* @see #nextElementSibling()
*/
public Element previousElementSibling() {
- if (parentNode == null) return null;
+ if (parentNode == null) {
+ return null;
+ }
List<Element> siblings = parent().children();
Integer index = indexInList(this, siblings);
Validate.notNull(index);
- if (index > 0)
- return siblings.get(index-1);
- else
+ if (index > 0) {
+ return siblings.get(index - 1);
+ } else {
return null;
+ }
}
/**
* Gets the first element sibling of this element.
- * @return the first sibling that is an element (aka the parent's first element child)
+ *
+ * @return the first sibling that is an element (aka the parent's first
+ * element child)
*/
public Element firstElementSibling() {
// todo: should firstSibling() exclude this?
List<Element> siblings = parent().children();
return siblings.size() > 1 ? siblings.get(0) : null;
}
-
+
/**
- * Get the list index of this element in its element sibling list. I.e. if this is the first element
- * sibling, returns 0.
+ * Get the list index of this element in its element sibling list. I.e. if
+ * this is the first element sibling, returns 0.
+ *
* @return position in element sibling list
*/
public Integer elementSiblingIndex() {
- if (parent() == null) return 0;
- return indexInList(this, parent().children());
+ if (parent() == null) {
+ return 0;
+ }
+ return indexInList(this, parent().children());
}
/**
* Gets the last element sibling of this element
- * @return the last sibling that is an element (aka the parent's last element child)
+ *
+ * @return the last sibling that is an element (aka the parent's last
+ * element child)
*/
public Element lastElementSibling() {
List<Element> siblings = parent().children();
return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null;
}
-
- private static <E extends Element> Integer indexInList(Element search, List<E> elements) {
+
+ private static <E extends Element> Integer indexInList(Element search,
+ List<E> elements) {
Validate.notNull(search);
Validate.notNull(elements);
for (int i = 0; i < elements.size(); i++) {
E element = elements.get(i);
- if (element.equals(search))
+ if (element.equals(search)) {
return i;
+ }
}
return null;
}
@@ -521,9 +611,13 @@ public class Element extends Node {
// DOM type methods
/**
- * Finds elements, including and recursively under this element, with the specified tag name.
- * @param tagName The tag name to search for (case insensitively).
- * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
+ * Finds elements, including and recursively under this element, with the
+ * specified tag name.
+ *
+ * @param tagName
+ * The tag name to search for (case insensitively).
+ * @return a matching unmodifiable list of elements. Will be empty if this
+ * element and none of its children match.
*/
public Elements getElementsByTag(String tagName) {
Validate.notEmpty(tagName);
@@ -535,29 +629,37 @@ public class Element extends Node {
/**
* Find an element by ID, including or under this element.
* <p>
- * Note that this finds the first matching ID, starting with this element. If you search down from a different
- * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
- * use {@link Document#getElementById(String)}
- * @param id The ID to search for.
- * @return The first matching element by ID, starting with this element, or null if none found.
+ * Note that this finds the first matching ID, starting with this element.
+ * If you search down from a different starting point, it is possible to
+ * find a different element by ID. For unique element by ID within a
+ * Document, use {@link Document#getElementById(String)}
+ *
+ * @param id
+ * The ID to search for.
+ * @return The first matching element by ID, starting with this element, or
+ * null if none found.
*/
public Element getElementById(String id) {
Validate.notEmpty(id);
-
+
Elements elements = Collector.collect(new Evaluator.Id(id), this);
- if (elements.size() > 0)
+ if (elements.size() > 0) {
return elements.get(0);
- else
+ } else {
return null;
+ }
}
/**
- * Find elements that have this class, including or under this element. Case insensitive.
+ * Find elements that have this class, including or under this element. Case
+ * insensitive.
* <p>
- * Elements can have multiple classes (e.g. {@code <div class="header round first">}. This method
- * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
+ * Elements can have multiple classes (e.g.
+ * {@code <div class="header round first">}. This method checks each class,
+ * so you can find the above with {@code el.getElementsByClass("header");}.
*
- * @param className the name of the class to search for.
+ * @param className
+ * the name of the class to search for.
* @return elements with the supplied class name, empty if none
* @see #hasClass(String)
* @see #classNames()
@@ -570,8 +672,9 @@ public class Element extends Node {
/**
* Find elements that have a named attribute set. Case insensitive.
- *
- * @param key name of the attribute, e.g. {@code href}
+ *
+ * @param key
+ * name of the attribute, e.g. {@code href}
* @return elements that have this attribute, empty if none
*/
public Elements getElementsByAttribute(String key) {
@@ -582,88 +685,129 @@ public class Element extends Node {
}
/**
- * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
- * that have HTML5 datasets.
- * @param keyPrefix name prefix of the attribute e.g. {@code data-}
- * @return elements that have attribute names that start with with the prefix, empty if none.
+ * Find elements that have an attribute name starting with the supplied
+ * prefix. Use {@code data-} to find elements that have HTML5 datasets.
+ *
+ * @param keyPrefix
+ * name prefix of the attribute e.g. {@code data-}
+ * @return elements that have attribute names that start with with the
+ * prefix, empty if none.
*/
public Elements getElementsByAttributeStarting(String keyPrefix) {
Validate.notEmpty(keyPrefix);
keyPrefix = keyPrefix.trim().toLowerCase();
- return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
+ return Collector.collect(new Evaluator.AttributeStarting(keyPrefix),
+ this);
}
/**
- * Find elements that have an attribute with the specific value. Case insensitive.
+ * Find elements that have an attribute with the specific value. Case
+ * insensitive.
*
- * @param key name of the attribute
- * @param value value of the attribute
+ * @param key
+ * name of the attribute
+ * @param value
+ * value of the attribute
* @return elements that have this attribute with this value, empty if none
*/
public Elements getElementsByAttributeValue(String key, String value) {
- return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
+ return Collector.collect(new Evaluator.AttributeWithValue(key, value),
+ this);
}
/**
- * Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
+ * Find elements that either do not have this attribute, or have it with a
+ * different value. Case insensitive.
*
- * @param key name of the attribute
- * @param value value of the attribute
+ * @param key
+ * name of the attribute
+ * @param value
+ * value of the attribute
* @return elements that do not have a matching attribute
*/
public Elements getElementsByAttributeValueNot(String key, String value) {
- return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
+ return Collector.collect(
+ new Evaluator.AttributeWithValueNot(key, value), this);
}
/**
- * Find elements that have attributes that start with the value prefix. Case insensitive.
+ * Find elements that have attributes that start with the value prefix. Case
+ * insensitive.
*
- * @param key name of the attribute
- * @param valuePrefix start of attribute value
+ * @param key
+ * name of the attribute
+ * @param valuePrefix
+ * start of attribute value
* @return elements that have attributes that start with the value prefix
*/
- public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
- return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
+ public Elements getElementsByAttributeValueStarting(String key,
+ String valuePrefix) {
+ return Collector.collect(new Evaluator.AttributeWithValueStarting(key,
+ valuePrefix), this);
}
/**
- * Find elements that have attributes that end with the value suffix. Case insensitive.
+ * Find elements that have attributes that end with the value suffix. Case
+ * insensitive.
*
- * @param key name of the attribute
- * @param valueSuffix end of the attribute value
+ * @param key
+ * name of the attribute
+ * @param valueSuffix
+ * end of the attribute value
* @return elements that have attributes that end with the value suffix
*/
- public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
- return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
+ public Elements getElementsByAttributeValueEnding(String key,
+ String valueSuffix) {
+ return Collector.collect(new Evaluator.AttributeWithValueEnding(key,
+ valueSuffix), this);
}
/**
- * Find elements that have attributes whose value contains the match string. Case insensitive.
+ * Find elements that have attributes whose value contains the match string.
+ * Case insensitive.
*
- * @param key name of the attribute
- * @param match substring of value to search for
+ * @param key
+ * name of the attribute
+ * @param match
+ * substring of value to search for
* @return elements that have attributes containing this text
*/
- public Elements getElementsByAttributeValueContaining(String key, String match) {
- return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
+ public Elements getElementsByAttributeValueContaining(String key,
+ String match) {
+ return Collector.collect(new Evaluator.AttributeWithValueContaining(
+ key, match), this);
}
-
+
/**
- * Find elements that have attributes whose values match the supplied regular expression.
- * @param key name of the attribute
- * @param pattern compiled regular expression to match against attribute values
+ * Find elements that have attributes whose values match the supplied
+ * regular expression.
+ *
+ * @param key
+ * name of the attribute
+ * @param pattern
+ * compiled regular expression to match against attribute values
* @return elements that have attributes matching this regular expression
*/
- public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
- return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
-
+ public Elements getElementsByAttributeValueMatching(String key,
+ Pattern pattern) {
+ return Collector.collect(new Evaluator.AttributeWithValueMatching(key,
+ pattern), this);
+
}
-
+
/**
- * Find elements that have attributes whose values match the supplied regular expression.
- * @param key name of the attribute
- * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ * Find elements that have attributes whose values match the supplied
+ * regular expression.
+ *
+ * @param key
+ * name of the attribute
+ * @param regex
+ * regular expression to match against attribute values. You can
+ * use <a href=
+ * "http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded"
+ * >embedded flags</a> (such as (?i) and (?m) to control regex
+ * options.
* @return elements that have attributes matching this regular expression
*/
public Elements getElementsByAttributeValueMatching(String key, String regex) {
@@ -671,73 +815,94 @@ public class Element extends Node {
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
- throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ throw new IllegalArgumentException(
+ "Pattern syntax error: " + regex, e);
}
return getElementsByAttributeValueMatching(key, pattern);
}
-
+
/**
* Find elements whose sibling index is less than the supplied index.
- * @param index 0-based index
+ *
+ * @param index
+ * 0-based index
* @return elements less than index
*/
public Elements getElementsByIndexLessThan(int index) {
return Collector.collect(new Evaluator.IndexLessThan(index), this);
}
-
+
/**
* Find elements whose sibling index is greater than the supplied index.
- * @param index 0-based index
+ *
+ * @param index
+ * 0-based index
* @return elements greater than index
*/
public Elements getElementsByIndexGreaterThan(int index) {
return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
}
-
+
/**
* Find elements whose sibling index is equal to the supplied index.
- * @param index 0-based index
+ *
+ * @param index
+ * 0-based index
* @return elements equal to index
*/
public Elements getElementsByIndexEquals(int index) {
return Collector.collect(new Evaluator.IndexEquals(index), this);
}
-
+
/**
- * Find elements that contain the specified string. The search is case insensitive. The text may appear directly
- * in the element, or in any of its descendants.
- * @param searchText to look for in the element's text
+ * Find elements that contain the specified string. The search is case
+ * insensitive. The text may appear directly in the element, or in any of
+ * its descendants.
+ *
+ * @param searchText
+ * to look for in the element's text
* @return elements that contain the string, case insensitive.
* @see Element#text()
*/
public Elements getElementsContainingText(String searchText) {
return Collector.collect(new Evaluator.ContainsText(searchText), this);
}
-
+
/**
- * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
- * in the element, not in any of its descendants.
- * @param searchText to look for in the element's own text
+ * Find elements that directly contain the specified string. The search is
+ * case insensitive. The text must appear directly in the element, not in
+ * any of its descendants.
+ *
+ * @param searchText
+ * to look for in the element's own text
* @return elements that contain the string, case insensitive.
* @see Element#ownText()
*/
public Elements getElementsContainingOwnText(String searchText) {
- return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
+ return Collector.collect(new Evaluator.ContainsOwnText(searchText),
+ this);
}
-
+
/**
* Find elements whose text matches the supplied regular expression.
- * @param pattern regular expression to match text against
+ *
+ * @param pattern
+ * regular expression to match text against
* @return elements matching the supplied regular expression.
* @see Element#text()
*/
public Elements getElementsMatchingText(Pattern pattern) {
return Collector.collect(new Evaluator.Matches(pattern), this);
}
-
+
/**
* Find elements whose text matches the supplied regular expression.
- * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ *
+ * @param regex
+ * regular expression to match text against. You can use <a href=
+ * "http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded"
+ * >embedded flags</a> (such as (?i) and (?m) to control regex
+ * options.
* @return elements matching the supplied regular expression.
* @see Element#text()
*/
@@ -746,24 +911,32 @@ public class Element extends Node {
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
- throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ throw new IllegalArgumentException(
+ "Pattern syntax error: " + regex, e);
}
return getElementsMatchingText(pattern);
}
-
+
/**
* Find elements whose own text matches the supplied regular expression.
- * @param pattern regular expression to match text against
+ *
+ * @param pattern
+ * regular expression to match text against
* @return elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Elements getElementsMatchingOwnText(Pattern pattern) {
return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
}
-
+
/**
* Find elements whose text matches the supplied regular expression.
- * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
+ *
+ * @param regex
+ * regular expression to match text against. You can use <a href=
+ * "http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded"
+ * >embedded flags</a> (such as (?i) and (?m) to control regex
+ * options.
* @return elements matching the supplied regular expression.
* @see Element#ownText()
*/
@@ -772,13 +945,15 @@ public class Element extends Node {
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
- throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
+ throw new IllegalArgumentException(
+ "Pattern syntax error: " + regex, e);
}
return getElementsMatchingOwnText(pattern);
}
-
+
/**
- * Find all elements under this element (including self, and children of children).
+ * Find all elements under this element (including self, and children of
+ * children).
*
* @return all elements
*/
@@ -789,8 +964,9 @@ public class Element extends Node {
/**
* Gets the combined text of this element and all its children.
* <p>
- * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.text()} returns {@code "Hello there now!"}
- *
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>},
+ * {@code p.text()} returns {@code "Hello there now!"}
+ *
* @return unencoded text, or empty string if none.
* @see #ownText()
* @see #textNodes()
@@ -803,27 +979,32 @@ public class Element extends Node {
private void text(StringBuilder accum) {
appendWhitespaceIfBr(this, accum);
-
+
for (Node child : childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
appendNormalisedText(accum, textNode);
} else if (child instanceof Element) {
Element element = (Element) child;
- if (accum.length() > 0 && element.isBlock() && !TextNode.lastCharIsWhitespace(accum))
+ if (accum.length() > 0 && element.isBlock()
+ && !TextNode.lastCharIsWhitespace(accum)) {
accum.append(" ");
+ }
element.text(accum);
}
}
}
/**
- * Gets the text owned by this element only; does not get the combined text of all children.
+ * Gets the text owned by this element only; does not get the combined text
+ * of all children.
* <p>
- * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
- * whereas {@code p.text()} returns {@code "Hello there now!"}.
- * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
- *
+ * For example, given HTML {@code <p>Hello <b>there</b> now!</p>},
+ * {@code p.ownText()} returns {@code "Hello now!"}, whereas
+ * {@code p.text()} returns {@code "Hello there now!"}. Note that the text
+ * within the {@code b} element is not returned, as it is not a direct child
+ * of the {@code p} element.
+ *
* @return unencoded text, or empty string if none.
* @see #text()
* @see #textNodes()
@@ -850,24 +1031,32 @@ public class Element extends Node {
if (!preserveWhitespace()) {
text = TextNode.normaliseWhitespace(text);
- if (TextNode.lastCharIsWhitespace(accum))
+ if (TextNode.lastCharIsWhitespace(accum)) {
text = TextNode.stripLeadingWhitespace(text);
+ }
}
accum.append(text);
}
- private static void appendWhitespaceIfBr(Element element, StringBuilder accum) {
- if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum))
+ private static void appendWhitespaceIfBr(Element element,
+ StringBuilder accum) {
+ if (element.tag.getName().equals("br")
+ && !TextNode.lastCharIsWhitespace(accum)) {
accum.append(" ");
+ }
}
boolean preserveWhitespace() {
- return tag.preserveWhitespace() || parent() != null && parent().preserveWhitespace();
+ return tag.preserveWhitespace() || parent() != null
+ && parent().preserveWhitespace();
}
/**
- * Set the text of this element. Any existing contents (text or elements) will be cleared
- * @param text unencoded text
+ * Set the text of this element. Any existing contents (text or elements)
+ * will be cleared
+ *
+ * @param text
+ * unencoded text
* @return this element
*/
public Element text(String text) {
@@ -881,28 +1070,33 @@ public class Element extends Node {
}
/**
- Test if this element has any text content (that is not just whitespace).
- @return true if element has non-blank text content.
+ * Test if this element has any text content (that is not just whitespace).
+ *
+ * @return true if element has non-blank text content.
*/
public boolean hasText() {
- for (Node child: childNodes) {
+ for (Node child : childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
- if (!textNode.isBlank())
+ if (!textNode.isBlank()) {
return true;
+ }
} else if (child instanceof Element) {
Element el = (Element) child;
- if (el.hasText())
+ if (el.hasText()) {
return true;
+ }
}
}
return false;
}
/**
- * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
+ * Get the combined data of this element. Data is e.g. the inside of a
+ * {@code script} tag.
+ *
* @return the data, or empty string if none
- *
+ *
* @see #dataNodes()
*/
public String data() {
@@ -919,21 +1113,28 @@ public class Element extends Node {
}
}
return sb.toString();
- }
+ }
/**
- * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
- * separated. (E.g. on <code>&lt;div class="header gray"></code> returns, "<code>header gray</code>")
- * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
+ * Gets the literal value of this element's "class" attribute, which may
+ * include multiple class names, space separated. (E.g. on
+ * <code>&lt;div class="header gray"></code> returns, "
+ * <code>header gray</code>")
+ *
+ * @return The literal class attribute, or <b>empty string</b> if no class
+ * attribute set.
*/
public String className() {
return attr("class");
}
/**
- * Get all of the element's class names. E.g. on element {@code <div class="header gray"}>},
- * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
- * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
+ * Get all of the element's class names. E.g. on element
+ * {@code <div class="header gray"}>}, returns a set of two elements
+ * {@code "header", "gray"}. Note that modifications to this set are not
+ * pushed to the backing {@code class} attribute; use the
+ * {@link #classNames(java.util.Set)} method to persist them.
+ *
* @return set of classnames, empty if no class attribute
*/
public Set<String> classNames() {
@@ -945,9 +1146,11 @@ public class Element extends Node {
}
/**
- Set the element's {@code class} attribute to the supplied class names.
- @param classNames set of classes
- @return this element, for chaining
+ * Set the element's {@code class} attribute to the supplied class names.
+ *
+ * @param classNames
+ * set of classes
+ * @return this element, for chaining
*/
public Element classNames(Set<String> classNames) {
Validate.notNull(classNames);
@@ -957,22 +1160,27 @@ public class Element extends Node {
/**
* Tests if this element has a class. Case insensitive.
- * @param className name of class to check for
+ *
+ * @param className
+ * name of class to check for
* @return true if it does, false if not
*/
public boolean hasClass(String className) {
Set<String> classNames = classNames();
for (String name : classNames) {
- if (className.equalsIgnoreCase(name))
+ if (className.equalsIgnoreCase(name)) {
return true;
+ }
}
return false;
}
/**
- Add a class name to this element's {@code class} attribute.
- @param className class name to add
- @return this element
+ * Add a class name to this element's {@code class} attribute.
+ *
+ * @param className
+ * class name to add
+ * @return this element
*/
public Element addClass(String className) {
Validate.notNull(className);
@@ -985,9 +1193,11 @@ public class Element extends Node {
}
/**
- Remove a class name from this element's {@code class} attribute.
- @param className class name to remove
- @return this element
+ * Remove a class name from this element's {@code class} attribute.
+ *
+ * @param className
+ * class name to remove
+ * @return this element
*/
public Element removeClass(String className) {
Validate.notNull(className);
@@ -1000,90 +1210,114 @@ public class Element extends Node {
}
/**
- Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
- @param className class name to toggle
- @return this element
+ * Toggle a class name on this element's {@code class} attribute: if
+ * present, remove it; otherwise add it.
+ *
+ * @param className
+ * class name to toggle
+ * @return this element
*/
public Element toggleClass(String className) {
Validate.notNull(className);
Set<String> classes = classNames();
- if (classes.contains(className))
+ if (classes.contains(className)) {
classes.remove(className);
- else
+ } else {
classes.add(className);
+ }
classNames(classes);
return this;
}
-
+
/**
* Get the value of a form element (input, textarea, etc).
+ *
* @return the value of the form element, or empty string if not set.
*/
public String val() {
- if (tagName().equals("textarea"))
+ if (tagName().equals("textarea")) {
return text();
- else
+ } else {
return attr("value");
+ }
}
-
+
/**
* Set the value of a form element (input, textarea, etc).
- * @param value value to set
+ *
+ * @param value
+ * value to set
* @return this element (for chaining)
*/
public Element val(String value) {
- if (tagName().equals("textarea"))
+ if (tagName().equals("textarea")) {
text(value);
- else
+ } else {
attr("value", value);
+ }
return this;
}
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
- if (accum.length() > 0 && out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && parent().tag().formatAsBlock())))
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ if (accum.length() > 0
+ && out.prettyPrint()
+ && (tag.formatAsBlock() || (parent() != null && parent().tag()
+ .formatAsBlock()))) {
indent(accum, depth, out);
- accum
- .append("<")
- .append(tagName());
+ }
+ accum.append("<").append(tagName());
attributes.html(accum, out);
- if (childNodes.isEmpty() && tag.isSelfClosing())
+ if (childNodes.isEmpty() && tag.isSelfClosing()) {
accum.append(" />");
- else
+ } else {
accum.append(">");
+ }
}
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
- if (out.prettyPrint() && !childNodes.isEmpty() && tag.formatAsBlock())
+ if (out.prettyPrint() && !childNodes.isEmpty()
+ && tag.formatAsBlock()) {
indent(accum, depth, out);
+ }
accum.append("</").append(tagName()).append(">");
}
}
/**
- * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
- * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
+ * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one
+ * empty {@code <p>}, would return {@code <p></p>}. (Whereas
+ * {@link #outerHtml()} would return {@code <div>
+ * <p></p>
+ * </div>}.)
*
* @return String of HTML.
* @see #outerHtml()
*/
public String html() {
StringBuilder accum = new StringBuilder();
- html(accum);
+ html(accum);
return accum.toString().trim();
}
private void html(StringBuilder accum) {
- for (Node node : childNodes)
+ for (Node node : childNodes) {
node.outerHtml(accum);
+ }
}
-
+
/**
* Set this element's inner HTML. Clears the existing HTML first.
- * @param html HTML to parse and set into this element
+ *
+ * @param html
+ * HTML to parse and set into this element
* @return this element
* @see #append(String)
*/
@@ -1093,6 +1327,7 @@ public class Element extends Node {
return this;
}
+ @Override
public String toString() {
return outerHtml();
}
@@ -1113,7 +1348,8 @@ public class Element extends Node {
@Override
public Element clone() {
Element clone = (Element) super.clone();
- clone.classNames(); // creates linked set of class names from class attribute
+ clone.classNames(); // creates linked set of class names from class
+ // attribute
return clone;
}
}
diff --git a/server/src/org/jsoup/nodes/Entities.java b/server/src/org/jsoup/nodes/Entities.java
index 0ae83e1fc0..24b50d7344 100644
--- a/server/src/org/jsoup/nodes/Entities.java
+++ b/server/src/org/jsoup/nodes/Entities.java
@@ -3,18 +3,24 @@ package org.jsoup.nodes;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
- * HTML entities, and escape routines.
- * Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
- * named character references</a>.
+ * HTML entities, and escape routines. Source: <a href=
+ * "http://www.w3.org/TR/html5/named-character-references.html#named-character-references"
+ * >W3C HTML named character references</a>.
*/
public class Entities {
public enum EscapeMode {
- /** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */
+ /**
+ * Restricted entities suitable for XHTML output: lt, gt, amp, apos, and
+ * quot only.
+ */
xhtml(xhtmlByVal),
/** Default HTML output entities. */
base(baseByVal),
@@ -36,21 +42,26 @@ public class Entities {
private static final Map<Character, String> xhtmlByVal;
private static final Map<Character, String> baseByVal;
private static final Map<Character, String> fullByVal;
- private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
- private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
+ private static final Pattern unescapePattern = Pattern
+ .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
+ private static final Pattern strictUnescapePattern = Pattern
+ .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
- private Entities() {}
+ private Entities() {
+ }
/**
* Check if the input is a known named entity
- * @param name the possible entity name (e.g. "lt" or "amp"
+ *
+ * @param name
+ * the possible entity name (e.g. "lt" or "amp"
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
- /**
+/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
@@ -58,23 +69,25 @@ public class Entities {
public static Character getCharacterByName(String name) {
return full.get(name);
}
-
+
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
- static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
+ static String escape(String string, CharsetEncoder encoder,
+ EscapeMode escapeMode) {
StringBuilder accum = new StringBuilder(string.length() * 2);
Map<Character, String> map = escapeMode.getMap();
for (int pos = 0; pos < string.length(); pos++) {
Character c = string.charAt(pos);
- if (map.containsKey(c))
+ if (map.containsKey(c)) {
accum.append('&').append(map.get(c)).append(';');
- else if (encoder.canEncode(c))
+ } else if (encoder.canEncode(c)) {
accum.append(c.charValue());
- else
+ } else {
accum.append("&#").append((int) c).append(';');
+ }
}
return accum.toString();
@@ -86,39 +99,53 @@ public class Entities {
/**
* Unescape the input string.
+ *
* @param string
- * @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional)
+ * @param strict
+ * if "strict" (that is, requires trailing ';' char, otherwise
+ * that's optional)
* @return
*/
static String unescape(String string, boolean strict) {
// todo: change this method to use Tokeniser.consumeCharacterReference
- if (!string.contains("&"))
+ if (!string.contains("&")) {
return string;
+ }
- Matcher m = strict? strictUnescapePattern.matcher(string) : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
- StringBuffer accum = new StringBuffer(string.length()); // pity matcher can't use stringbuilder, avoid syncs
- // todo: replace m.appendReplacement with own impl, so StringBuilder and quoteReplacement not required
+ Matcher m = strict ? strictUnescapePattern.matcher(string)
+ : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
+ StringBuffer accum = new StringBuffer(string.length()); // pity matcher
+ // can't use
+ // stringbuilder,
+ // avoid syncs
+ // todo: replace m.appendReplacement with own impl, so StringBuilder and
+ // quoteReplacement not required
while (m.find()) {
int charval = -1;
String num = m.group(3);
if (num != null) {
try {
- int base = m.group(2) != null ? 16 : 10; // 2 is hex indicator
+ int base = m.group(2) != null ? 16 : 10; // 2 is hex
+ // indicator
charval = Integer.valueOf(num, base);
} catch (NumberFormatException e) {
} // skip
} else {
String name = m.group(1);
- if (full.containsKey(name))
+ if (full.containsKey(name)) {
charval = full.get(name);
+ }
}
if (charval != -1 || charval > 0xFFFF) { // out of range
String c = Character.toString((char) charval);
m.appendReplacement(accum, Matcher.quoteReplacement(c));
} else {
- m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace with original string
+ m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace
+ // with
+ // original
+ // string
}
}
m.appendTail(accum);
@@ -126,22 +153,23 @@ public class Entities {
}
// xhtml has restricted entities
- private static final Object[][] xhtmlArray = {
- {"quot", 0x00022},
- {"amp", 0x00026},
- {"apos", 0x00027},
- {"lt", 0x0003C},
- {"gt", 0x0003E}
- };
+ private static final Object[][] xhtmlArray = { { "quot", 0x00022 },
+ { "amp", 0x00026 }, { "apos", 0x00027 }, { "lt", 0x0003C },
+ { "gt", 0x0003E } };
static {
xhtmlByVal = new HashMap<Character, String>();
- baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most common / default
- full = loadEntities("entities-full.properties"); // extended and overblown.
+ baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most
+ // common
+ // /
+ // default
+ full = loadEntities("entities-full.properties"); // extended and
+ // overblown.
fullByVal = toCharacterKey(full);
for (Object[] entity : xhtmlArray) {
- Character c = Character.valueOf((char) ((Integer) entity[1]).intValue());
+ Character c = Character.valueOf((char) ((Integer) entity[1])
+ .intValue());
xhtmlByVal.put(c, ((String) entity[0]));
}
}
@@ -154,27 +182,32 @@ public class Entities {
properties.load(in);
in.close();
} catch (IOException e) {
- throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename);
+ throw new MissingResourceException(
+ "Error loading entities resource: " + e.getMessage(),
+ "Entities", filename);
}
- for (Map.Entry entry: properties.entrySet()) {
- Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16));
+ for (Map.Entry entry : properties.entrySet()) {
+ Character val = Character.valueOf((char) Integer.parseInt(
+ (String) entry.getValue(), 16));
String name = (String) entry.getKey();
entities.put(name, val);
}
return entities;
}
- private static Map<Character, String> toCharacterKey(Map<String, Character> inMap) {
+ private static Map<Character, String> toCharacterKey(
+ Map<String, Character> inMap) {
Map<Character, String> outMap = new HashMap<Character, String>();
- for (Map.Entry<String, Character> entry: inMap.entrySet()) {
+ for (Map.Entry<String, Character> entry : inMap.entrySet()) {
Character character = entry.getValue();
String name = entry.getKey();
if (outMap.containsKey(character)) {
// dupe, prefer the lower case version
- if (name.toLowerCase().equals(name))
+ if (name.toLowerCase().equals(name)) {
outMap.put(character, name);
+ }
} else {
outMap.put(character, name);
}
diff --git a/server/src/org/jsoup/nodes/Node.java b/server/src/org/jsoup/nodes/Node.java
index eb2b40ee73..72b8dcbd47 100644
--- a/server/src/org/jsoup/nodes/Node.java
+++ b/server/src/org/jsoup/nodes/Node.java
@@ -1,21 +1,23 @@
package org.jsoup.nodes;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-import org.jsoup.parser.Parser;
-import org.jsoup.select.NodeTraversor;
-import org.jsoup.select.NodeVisitor;
-
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-/**
- The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.parser.Parser;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
- @author Jonathan Hedley, jonathan@hedley.net */
+/**
+ * The base, abstract Node model. Elements, Documents, Comments etc are all Node
+ * instances.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public abstract class Node implements Cloneable {
Node parentNode;
List<Node> childNodes;
@@ -24,14 +26,17 @@ public abstract class Node implements Cloneable {
int siblingIndex;
/**
- Create a new Node.
- @param baseUri base URI
- @param attributes attributes (not null, but may be empty)
+ * Create a new Node.
+ *
+ * @param baseUri
+ * base URI
+ * @param attributes
+ * attributes (not null, but may be empty)
*/
protected Node(String baseUri, Attributes attributes) {
Validate.notNull(baseUri);
Validate.notNull(attributes);
-
+
childNodes = new ArrayList<Node>(4);
this.baseUri = baseUri.trim();
this.attributes = attributes;
@@ -42,7 +47,8 @@ public abstract class Node implements Cloneable {
}
/**
- * Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
+ * Default constructor. Doesn't setup base uri, children, or attributes; use
+ * with caution.
*/
protected Node() {
childNodes = Collections.emptyList();
@@ -50,18 +56,23 @@ public abstract class Node implements Cloneable {
}
/**
- Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
- @return node name
+ * Get the node name of this node. Use for debugging purposes and not logic
+ * switching (for that, use instanceof).
+ *
+ * @return node name
*/
public abstract String nodeName();
/**
* Get an attribute's value by its key.
* <p/>
- * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
- * which is a shortcut to the {@link #absUrl} method.
- * E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
- * @param attributeKey The attribute key.
+ * To get an absolute URL from an attribute that may be a relative URL,
+ * prefix the key with <code><b>abs</b></code>, which is a shortcut to the
+ * {@link #absUrl} method. E.g.: <blockquote>
+ * <code>String url = a.attr("abs:href");</code></blockquote>
+ *
+ * @param attributeKey
+ * The attribute key.
* @return The attribute, or empty string if not present (to avoid nulls).
* @see #attributes()
* @see #hasAttr(String)
@@ -70,25 +81,33 @@ public abstract class Node implements Cloneable {
public String attr(String attributeKey) {
Validate.notNull(attributeKey);
- if (attributes.hasKey(attributeKey))
+ if (attributes.hasKey(attributeKey)) {
return attributes.get(attributeKey);
- else if (attributeKey.toLowerCase().startsWith("abs:"))
+ } else if (attributeKey.toLowerCase().startsWith("abs:")) {
return absUrl(attributeKey.substring("abs:".length()));
- else return "";
+ } else {
+ return "";
+ }
}
/**
* Get all of the element's attributes.
- * @return attributes (which implements iterable, in same order as presented in original HTML).
+ *
+ * @return attributes (which implements iterable, in same order as presented
+ * in original HTML).
*/
public Attributes attributes() {
return attributes;
}
/**
- * Set an attribute (key=value). If the attribute already exists, it is replaced.
- * @param attributeKey The attribute key.
- * @param attributeValue The attribute value.
+ * Set an attribute (key=value). If the attribute already exists, it is
+ * replaced.
+ *
+ * @param attributeKey
+ * The attribute key.
+ * @param attributeValue
+ * The attribute value.
* @return this (for chaining)
*/
public Node attr(String attributeKey, String attributeValue) {
@@ -98,7 +117,9 @@ public abstract class Node implements Cloneable {
/**
* Test if this element has an attribute.
- * @param attributeKey The attribute key to check.
+ *
+ * @param attributeKey
+ * The attribute key to check.
* @return true if the attribute exists, false if not.
*/
public boolean hasAttr(String attributeKey) {
@@ -106,15 +127,18 @@ public abstract class Node implements Cloneable {
if (attributeKey.toLowerCase().startsWith("abs:")) {
String key = attributeKey.substring("abs:".length());
- if (attributes.hasKey(key) && !absUrl(key).equals(""))
+ if (attributes.hasKey(key) && !absUrl(key).equals("")) {
return true;
+ }
}
return attributes.hasKey(attributeKey);
}
/**
* Remove an attribute from this element.
- * @param attributeKey The attribute to remove.
+ *
+ * @param attributeKey
+ * The attribute to remove.
* @return this (for chaining)
*/
public Node removeAttr(String attributeKey) {
@@ -124,47 +148,56 @@ public abstract class Node implements Cloneable {
}
/**
- Get the base URI of this node.
- @return base URI
+ * Get the base URI of this node.
+ *
+ * @return base URI
*/
public String baseUri() {
return baseUri;
}
/**
- Update the base URI of this node and all of its descendants.
- @param baseUri base URI to set
+ * Update the base URI of this node and all of its descendants.
+ *
+ * @param baseUri
+ * base URI to set
*/
public void setBaseUri(final String baseUri) {
Validate.notNull(baseUri);
traverse(new NodeVisitor() {
+ @Override
public void head(Node node, int depth) {
node.baseUri = baseUri;
}
+ @Override
public void tail(Node node, int depth) {
}
});
}
/**
- * Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href></code> or
- * <code>&lt;img src></code>).
+ * Get an absolute URL from a URL attribute that may be relative (i.e. an
+ * <code>&lt;a href></code> or <code>&lt;img src></code>).
* <p/>
* E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
* <p/>
- * If the attribute value is already absolute (i.e. it starts with a protocol, like
- * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
- * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
- * absolute using that.
+ * If the attribute value is already absolute (i.e. it starts with a
+ * protocol, like <code>http://</code> or <code>https://</code> etc), and it
+ * successfully parses as a URL, the attribute is returned directly.
+ * Otherwise, it is treated as a URL relative to the element's
+ * {@link #baseUri}, and made absolute using that.
* <p/>
- * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
+ * As an alternate, you can use the {@link #attr} method with the
+ * <code>abs:</code> prefix, e.g.:
* <code>String absUrl = linkEl.attr("abs:href");</code>
- *
- * @param attributeKey The attribute key
- * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
- * could not be made successfully into a URL.
+ *
+ * @param attributeKey
+ * The attribute key
+ * @return An absolute URL if one could be made, or an empty string (not
+ * null) if the attribute was missing or could not be made
+ * successfully into a URL.
* @see #attr
* @see java.net.URL#URL(java.net.URL, String)
*/
@@ -180,13 +213,16 @@ public abstract class Node implements Cloneable {
try {
base = new URL(baseUri);
} catch (MalformedURLException e) {
- // the base is unsuitable, but the attribute may be abs on its own, so try that
+ // the base is unsuitable, but the attribute may be abs on
+ // its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
- // workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
- if (relUrl.startsWith("?"))
+ // workaround: java resolves '//path/file + ?foo' to
+ // '//path/?foo', not '//path/file?foo' as desired
+ if (relUrl.startsWith("?")) {
relUrl = base.getPath() + relUrl;
+ }
URL abs = new URL(base, relUrl);
return abs.toExternalForm();
} catch (MalformedURLException e) {
@@ -196,50 +232,58 @@ public abstract class Node implements Cloneable {
}
/**
- Get a child node by index
- @param index index of child node
- @return the child node at this index.
+ * Get a child node by index
+ *
+ * @param index
+ * index of child node
+ * @return the child node at this index.
*/
public Node childNode(int index) {
return childNodes.get(index);
}
/**
- Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
- themselves can be manipulated.
- @return list of children. If no children, returns an empty list.
+ * Get this node's children. Presented as an unmodifiable list: new children
+ * can not be added, but the child nodes themselves can be manipulated.
+ *
+ * @return list of children. If no children, returns an empty list.
*/
public List<Node> childNodes() {
return Collections.unmodifiableList(childNodes);
}
-
+
protected Node[] childNodesAsArray() {
return childNodes.toArray(new Node[childNodes().size()]);
}
/**
- Gets this node's parent node.
- @return parent node; or null if no parent.
+ * Gets this node's parent node.
+ *
+ * @return parent node; or null if no parent.
*/
public Node parent() {
return parentNode;
}
-
+
/**
- * Gets the Document associated with this Node.
- * @return the Document associated with this Node, or null if there is no such Document.
+ * Gets the Document associated with this Node.
+ *
+ * @return the Document associated with this Node, or null if there is no
+ * such Document.
*/
public Document ownerDocument() {
- if (this instanceof Document)
+ if (this instanceof Document) {
return (Document) this;
- else if (parentNode == null)
+ } else if (parentNode == null) {
return null;
- else
+ } else {
return parentNode.ownerDocument();
+ }
}
-
+
/**
- * Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
+ * Remove (delete) this node from the DOM tree. If this node has children,
+ * they are also removed.
*/
public void remove() {
Validate.notNull(parentNode);
@@ -247,8 +291,11 @@ public abstract class Node implements Cloneable {
}
/**
- * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
- * @param html HTML to add before this node
+ * Insert the specified HTML into the DOM before this node (i.e. as a
+ * preceding sibling).
+ *
+ * @param html
+ * HTML to add before this node
* @return this node, for chaining
* @see #after(String)
*/
@@ -258,8 +305,11 @@ public abstract class Node implements Cloneable {
}
/**
- * Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
- * @param node to add before this node
+ * Insert the specified node into the DOM before this node (i.e. as a
+ * preceding sibling).
+ *
+ * @param node
+ * to add before this node
* @return this node, for chaining
* @see #after(Node)
*/
@@ -272,19 +322,25 @@ public abstract class Node implements Cloneable {
}
/**
- * Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
- * @param html HTML to add after this node
+ * Insert the specified HTML into the DOM after this node (i.e. as a
+ * following sibling).
+ *
+ * @param html
+ * HTML to add after this node
* @return this node, for chaining
* @see #before(String)
*/
public Node after(String html) {
- addSiblingHtml(siblingIndex()+1, html);
+ addSiblingHtml(siblingIndex() + 1, html);
return this;
}
/**
- * Insert the specified node into the DOM after this node (i.e. as a following sibling).
- * @param node to add after this node
+ * Insert the specified node into the DOM after this node (i.e. as a
+ * following sibling).
+ *
+ * @param node
+ * to add after this node
* @return this node, for chaining
* @see #before(Node)
*/
@@ -292,7 +348,7 @@ public abstract class Node implements Cloneable {
Validate.notNull(node);
Validate.notNull(parentNode);
- parentNode.addChildren(siblingIndex()+1, node);
+ parentNode.addChildren(siblingIndex() + 1, node);
return this;
}
@@ -300,31 +356,39 @@ public abstract class Node implements Cloneable {
Validate.notNull(html);
Validate.notNull(parentNode);
- Element context = parent() instanceof Element ? (Element) parent() : null;
+ Element context = parent() instanceof Element ? (Element) parent()
+ : null;
List<Node> nodes = Parser.parseFragment(html, context, baseUri());
parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
}
/**
- Wrap the supplied HTML around this node.
- @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
- @return this node, for chaining.
+ * Wrap the supplied HTML around this node.
+ *
+ * @param html
+ * HTML to wrap around this element, e.g.
+ * {@code <div class="head"></div>}. Can be arbitrarily deep.
+ * @return this node, for chaining.
*/
public Node wrap(String html) {
Validate.notEmpty(html);
- Element context = parent() instanceof Element ? (Element) parent() : null;
- List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
+ Element context = parent() instanceof Element ? (Element) parent()
+ : null;
+ List<Node> wrapChildren = Parser
+ .parseFragment(html, context, baseUri());
Node wrapNode = wrapChildren.get(0);
- if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
+ if (wrapNode == null || !(wrapNode instanceof Element)) {
return null;
+ }
Element wrap = (Element) wrapNode;
Element deepest = getDeepChild(wrap);
parentNode.replaceChild(this, wrap);
deepest.addChildren(this);
- // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
+ // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is
+ // remainder
if (wrapChildren.size() > 0) {
for (int i = 0; i < wrapChildren.size(); i++) {
Node remainder = wrapChildren.get(i);
@@ -336,15 +400,19 @@ public abstract class Node implements Cloneable {
}
/**
- * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
- * the node but keeping its children.
+ * Removes this node from the DOM, and moves its children up into the node's
+ * parent. This has the effect of dropping the node but keeping its
+ * children.
* <p/>
* For example, with the input html:<br/>
* {@code <div>One <span>Two <b>Three</b></span></div>}<br/>
- * Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br/>
+ * Calling {@code element.unwrap()} on the {@code span} element will result
+ * in the html:<br/>
* {@code <div>One Two <b>Three</b></div>}<br/>
* and the {@code "Two "} {@link TextNode} being returned.
- * @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
+ *
+ * @return the first child of this node, after the node has been unwrapped.
+ * Null if the node had no children.
* @see #remove()
* @see #wrap(String)
*/
@@ -353,23 +421,26 @@ public abstract class Node implements Cloneable {
int index = siblingIndex;
Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
- parentNode.addChildren(index, this.childNodesAsArray());
- this.remove();
+ parentNode.addChildren(index, childNodesAsArray());
+ remove();
return firstChild;
}
private Element getDeepChild(Element el) {
List<Element> children = el.children();
- if (children.size() > 0)
+ if (children.size() > 0) {
return getDeepChild(children.get(0));
- else
+ } else {
return el;
+ }
}
-
+
/**
* Replace this node in the DOM with the supplied node.
- * @param in the node that will will replace the existing node.
+ *
+ * @param in
+ * the node that will will replace the existing node.
*/
public void replaceWith(Node in) {
Validate.notNull(in);
@@ -378,17 +449,19 @@ public abstract class Node implements Cloneable {
}
protected void setParentNode(Node parentNode) {
- if (this.parentNode != null)
+ if (this.parentNode != null) {
this.parentNode.removeChild(this);
+ }
this.parentNode = parentNode;
}
protected void replaceChild(Node out, Node in) {
Validate.isTrue(out.parentNode == this);
Validate.notNull(in);
- if (in.parentNode != null)
+ if (in.parentNode != null) {
in.parentNode.removeChild(in);
-
+ }
+
Integer index = out.siblingIndex();
childNodes.set(index, in);
in.parentNode = this;
@@ -405,11 +478,12 @@ public abstract class Node implements Cloneable {
}
protected void addChildren(Node... children) {
- //most used. short circuit addChildren(int), which hits reindex children and array copy
- for (Node child: children) {
+ // most used. short circuit addChildren(int), which hits reindex
+ // children and array copy
+ for (Node child : children) {
reparentChild(child);
childNodes.add(child);
- child.setSiblingIndex(childNodes.size()-1);
+ child.setSiblingIndex(childNodes.size() - 1);
}
}
@@ -424,85 +498,100 @@ public abstract class Node implements Cloneable {
}
private void reparentChild(Node child) {
- if (child.parentNode != null)
+ if (child.parentNode != null) {
child.parentNode.removeChild(child);
+ }
child.setParentNode(this);
}
-
+
private void reindexChildren() {
for (int i = 0; i < childNodes.size(); i++) {
childNodes.get(i).setSiblingIndex(i);
}
}
-
+
/**
- Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
- include this node (a node is not a sibling of itself).
- @return node siblings. If the node has no parent, returns an empty list.
+ * Retrieves this node's sibling nodes. Similar to {@link #childNodes()
+ * node.parent.childNodes()}, but does not include this node (a node is not
+ * a sibling of itself).
+ *
+ * @return node siblings. If the node has no parent, returns an empty list.
*/
public List<Node> siblingNodes() {
- if (parentNode == null)
+ if (parentNode == null) {
return Collections.emptyList();
+ }
List<Node> nodes = parentNode.childNodes;
List<Node> siblings = new ArrayList<Node>(nodes.size() - 1);
- for (Node node: nodes)
- if (node != this)
+ for (Node node : nodes) {
+ if (node != this) {
siblings.add(node);
+ }
+ }
return siblings;
}
/**
- Get this node's next sibling.
- @return next sibling, or null if this is the last sibling
+ * Get this node's next sibling.
+ *
+ * @return next sibling, or null if this is the last sibling
*/
public Node nextSibling() {
- if (parentNode == null)
+ if (parentNode == null) {
return null; // root
-
+ }
+
List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
- if (siblings.size() > index+1)
- return siblings.get(index+1);
- else
+ if (siblings.size() > index + 1) {
+ return siblings.get(index + 1);
+ } else {
return null;
+ }
}
/**
- Get this node's previous sibling.
- @return the previous sibling, or null if this is the first sibling
+ * Get this node's previous sibling.
+ *
+ * @return the previous sibling, or null if this is the first sibling
*/
public Node previousSibling() {
- if (parentNode == null)
+ if (parentNode == null) {
return null; // root
+ }
List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
- if (index > 0)
- return siblings.get(index-1);
- else
+ if (index > 0) {
+ return siblings.get(index - 1);
+ } else {
return null;
+ }
}
/**
- * Get the list index of this node in its node sibling list. I.e. if this is the first node
- * sibling, returns 0.
+ * Get the list index of this node in its node sibling list. I.e. if this is
+ * the first node sibling, returns 0.
+ *
* @return position in node sibling list
* @see org.jsoup.nodes.Element#elementSiblingIndex()
*/
public int siblingIndex() {
return siblingIndex;
}
-
+
protected void setSiblingIndex(int siblingIndex) {
this.siblingIndex = siblingIndex;
}
/**
* Perform a depth-first traversal through this node and its descendants.
- * @param nodeVisitor the visitor callbacks to perform on each node
+ *
+ * @param nodeVisitor
+ * the visitor callbacks to perform on each node
* @return this node, for chaining
*/
public Node traverse(NodeVisitor nodeVisitor) {
@@ -513,8 +602,9 @@ public abstract class Node implements Cloneable {
}
/**
- Get the outer HTML of this node.
- @return HTML
+ * Get the outer HTML of this node.
+ *
+ * @return HTML
*/
public String outerHtml() {
StringBuilder accum = new StringBuilder(128);
@@ -523,34 +613,47 @@ public abstract class Node implements Cloneable {
}
protected void outerHtml(StringBuilder accum) {
- new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this);
+ new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings()))
+ .traverse(this);
}
- // if this node has no document (or parent), retrieve the default output settings
+ // if this node has no document (or parent), retrieve the default output
+ // settings
private Document.OutputSettings getOutputSettings() {
- return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
+ return ownerDocument() != null ? ownerDocument().outputSettings()
+ : (new Document("")).outputSettings();
}
/**
- Get the outer HTML of this node.
- @param accum accumulator to place HTML into
+ * Get the outer HTML of this node.
+ *
+ * @param accum
+ * accumulator to place HTML into
*/
- abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
+ abstract void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out);
- abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
+ abstract void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out);
+ @Override
public String toString() {
return outerHtml();
}
- protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
- accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
+ protected void indent(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ accum.append("\n").append(
+ StringUtil.padding(depth * out.indentAmount()));
}
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- // todo: have nodes hold a child index, compare against that and parent (not children)
+ if (this == o) {
+ return true;
+ }
+ // todo: have nodes hold a child index, compare against that and parent
+ // (not children)
return false;
}
@@ -563,11 +666,14 @@ public abstract class Node implements Cloneable {
}
/**
- * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
- * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
- * original node.
+ * Create a stand-alone, deep copy of this node, and all of its children.
+ * The cloned node will have no siblings or parent node. As a stand-alone
+ * object, any changes made to the clone or any of its children will not
+ * impact the original node.
* <p>
- * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
+ * The cloned node may be adopted into another Document or node structure
+ * using {@link Element#appendChild(Node)}.
+ *
* @return stand-alone cloned node
*/
@Override
@@ -588,8 +694,11 @@ public abstract class Node implements Cloneable {
clone.attributes = attributes != null ? attributes.clone() : null;
clone.baseUri = baseUri;
clone.childNodes = new ArrayList<Node>(childNodes.size());
- for (Node child: childNodes)
- clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent
+ for (Node child : childNodes) {
+ clone.childNodes.add(child.doClone(clone)); // clone() creates
+ // orphans, doClone()
+ // keeps parent
+ }
return clone;
}
@@ -603,13 +712,16 @@ public abstract class Node implements Cloneable {
this.out = out;
}
+ @Override
public void head(Node node, int depth) {
node.outerHtmlHead(accum, depth, out);
}
+ @Override
public void tail(Node node, int depth) {
- if (!node.nodeName().equals("#text")) // saves a void hit.
+ if (!node.nodeName().equals("#text")) {
node.outerHtmlTail(accum, depth, out);
+ }
}
}
}
diff --git a/server/src/org/jsoup/nodes/TextNode.java b/server/src/org/jsoup/nodes/TextNode.java
index 9fd0feac8f..594e38593e 100644
--- a/server/src/org/jsoup/nodes/TextNode.java
+++ b/server/src/org/jsoup/nodes/TextNode.java
@@ -4,111 +4,142 @@ import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
/**
- A text node.
-
- @author Jonathan Hedley, jonathan@hedley.net */
+ * A text node.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class TextNode extends Node {
/*
- TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use
- memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create
- them as needed on the fly.
+ * TextNode is a node, and so by default comes with attributes and children.
+ * The attributes are seldom used, but use memory, and the child nodes are
+ * never used. So we don't have them, and override accessors to attributes
+ * to create them as needed on the fly.
*/
private static final String TEXT_KEY = "text";
String text;
/**
- Create a new TextNode representing the supplied (unencoded) text).
-
- @param text raw text
- @param baseUri base uri
- @see #createFromEncoded(String, String)
+ * Create a new TextNode representing the supplied (unencoded) text).
+ *
+ * @param text
+ * raw text
+ * @param baseUri
+ * base uri
+ * @see #createFromEncoded(String, String)
*/
public TextNode(String text, String baseUri) {
this.baseUri = baseUri;
this.text = text;
}
+ @Override
public String nodeName() {
return "#text";
}
-
+
/**
* Get the text content of this text node.
+ *
* @return Unencoded, normalised text.
* @see TextNode#getWholeText()
*/
public String text() {
return normaliseWhitespace(getWholeText());
}
-
+
/**
* Set the text content of this text node.
- * @param text unencoded text
+ *
+ * @param text
+ * unencoded text
* @return this, for chaining
*/
public TextNode text(String text) {
this.text = text;
- if (attributes != null)
+ if (attributes != null) {
attributes.put(TEXT_KEY, text);
+ }
return this;
}
/**
- Get the (unencoded) text of this text node, including any newlines and spaces present in the original.
- @return text
+ * Get the (unencoded) text of this text node, including any newlines and
+ * spaces present in the original.
+ *
+ * @return text
*/
public String getWholeText() {
return attributes == null ? text : attributes.get(TEXT_KEY);
}
/**
- Test if this text node is blank -- that is, empty or only whitespace (including newlines).
- @return true if this document is empty or only whitespace, false if it contains any text content.
+ * Test if this text node is blank -- that is, empty or only whitespace
+ * (including newlines).
+ *
+ * @return true if this document is empty or only whitespace, false if it
+ * contains any text content.
*/
public boolean isBlank() {
return StringUtil.isBlank(getWholeText());
}
/**
- * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
- * original text up to the offset, and will have a new text node sibling containing the text after the offset.
- * @param offset string offset point to split node at.
+ * Split this text node into two nodes at the specified string offset. After
+ * splitting, this node will contain the original text up to the offset, and
+ * will have a new text node sibling containing the text after the offset.
+ *
+ * @param offset
+ * string offset point to split node at.
* @return the newly created text node containing the text after the offset.
*/
public TextNode splitText(int offset) {
Validate.isTrue(offset >= 0, "Split offset must be not be negative");
- Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
+ Validate.isTrue(offset < text.length(),
+ "Split offset must not be greater than current text length");
String head = getWholeText().substring(0, offset);
String tail = getWholeText().substring(offset);
text(head);
- TextNode tailNode = new TextNode(tail, this.baseUri());
- if (parent() != null)
- parent().addChildren(siblingIndex()+1, tailNode);
+ TextNode tailNode = new TextNode(tail, baseUri());
+ if (parent() != null) {
+ parent().addChildren(siblingIndex() + 1, tailNode);
+ }
return tailNode;
}
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
String html = Entities.escape(getWholeText(), out);
- if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
+ if (out.prettyPrint() && parent() instanceof Element
+ && !((Element) parent()).preserveWhitespace()) {
html = normaliseWhitespace(html);
}
- if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank())
+ if (out.prettyPrint() && siblingIndex() == 0
+ && parentNode instanceof Element
+ && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) {
indent(accum, depth, out);
+ }
accum.append(html);
}
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ }
+ @Override
public String toString() {
return outerHtml();
}
/**
* Create a new TextNode from HTML encoded (aka escaped) data.
- * @param encodedText Text containing encoded HTML (e.g. &amp;lt;)
+ *
+ * @param encodedText
+ * Text containing encoded HTML (e.g. &amp;lt;)
* @return TextNode containing unencoded data (e.g. &lt;)
*/
public static TextNode createFromEncoded(String encodedText, String baseUri) {
diff --git a/server/src/org/jsoup/nodes/XmlDeclaration.java b/server/src/org/jsoup/nodes/XmlDeclaration.java
index 80d4a0152f..ce6ac678a5 100644
--- a/server/src/org/jsoup/nodes/XmlDeclaration.java
+++ b/server/src/org/jsoup/nodes/XmlDeclaration.java
@@ -1,47 +1,60 @@
package org.jsoup.nodes;
/**
- An XML Declaration.
-
- @author Jonathan Hedley, jonathan@hedley.net */
+ * An XML Declaration.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class XmlDeclaration extends Node {
private static final String DECL_KEY = "declaration";
- private final boolean isProcessingInstruction; // <! if true, <? if false, declaration (and last data char should be ?)
+ private final boolean isProcessingInstruction; // <! if true, <? if false,
+ // declaration (and last data
+ // char should be ?)
/**
- Create a new XML declaration
- @param data data
- @param baseUri base uri
- @param isProcessingInstruction is processing instruction
+ * Create a new XML declaration
+ *
+ * @param data
+ * data
+ * @param baseUri
+ * base uri
+ * @param isProcessingInstruction
+ * is processing instruction
*/
- public XmlDeclaration(String data, String baseUri, boolean isProcessingInstruction) {
+ public XmlDeclaration(String data, String baseUri,
+ boolean isProcessingInstruction) {
super(baseUri);
attributes.put(DECL_KEY, data);
this.isProcessingInstruction = isProcessingInstruction;
}
+ @Override
public String nodeName() {
return "#declaration";
}
/**
- Get the unencoded XML declaration.
- @return XML declaration
+ * Get the unencoded XML declaration.
+ *
+ * @return XML declaration
*/
public String getWholeDeclaration() {
return attributes.get(DECL_KEY);
}
- void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
- accum
- .append("<")
- .append(isProcessingInstruction ? "!" : "?")
- .append(getWholeDeclaration())
- .append(">");
+ @Override
+ void outerHtmlHead(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ accum.append("<").append(isProcessingInstruction ? "!" : "?")
+ .append(getWholeDeclaration()).append(">");
}
- void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
+ @Override
+ void outerHtmlTail(StringBuilder accum, int depth,
+ Document.OutputSettings out) {
+ }
+ @Override
public String toString() {
return outerHtml();
}
diff --git a/server/src/org/jsoup/parser/CharacterReader.java b/server/src/org/jsoup/parser/CharacterReader.java
index b549a571a0..30fbca07f1 100644
--- a/server/src/org/jsoup/parser/CharacterReader.java
+++ b/server/src/org/jsoup/parser/CharacterReader.java
@@ -3,7 +3,7 @@ package org.jsoup.parser;
import org.jsoup.helper.Validate;
/**
- CharacterReader consumes tokens off a string. To replace the old TokenQueue.
+ * CharacterReader consumes tokens off a string. To replace the old TokenQueue.
*/
class CharacterReader {
static final char EOF = (char) -1;
@@ -15,10 +15,11 @@ class CharacterReader {
CharacterReader(String input) {
Validate.notNull(input);
- input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns to newlines
+ input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns
+ // to newlines
this.input = input;
- this.length = input.length();
+ length = input.length();
}
int pos() {
@@ -87,8 +88,9 @@ class CharacterReader {
OUTER: while (!isEmpty()) {
char c = input.charAt(pos);
for (char seek : seq) {
- if (seek == c)
+ if (seek == c) {
break OUTER;
+ }
}
pos++;
}
@@ -106,10 +108,11 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
- if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
pos++;
- else
+ } else {
break;
+ }
}
return input.substring(start, pos);
@@ -119,17 +122,19 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
- if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
pos++;
- else
+ } else {
break;
+ }
}
while (!isEmpty()) {
char c = input.charAt(pos);
- if (c >= '0' && c <= '9')
+ if (c >= '0' && c <= '9') {
pos++;
- else
+ } else {
break;
+ }
}
return input.substring(start, pos);
@@ -139,10 +144,12 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
- if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
+ if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
+ || (c >= 'a' && c <= 'f')) {
pos++;
- else
+ } else {
break;
+ }
}
return input.substring(start, pos);
}
@@ -151,10 +158,11 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
- if (c >= '0' && c <= '9')
+ if (c >= '0' && c <= '9') {
pos++;
- else
+ } else {
break;
+ }
}
return input.substring(start, pos);
}
@@ -173,27 +181,31 @@ class CharacterReader {
}
boolean matchesAny(char... seq) {
- if (isEmpty())
+ if (isEmpty()) {
return false;
+ }
char c = input.charAt(pos);
for (char seek : seq) {
- if (seek == c)
+ if (seek == c) {
return true;
+ }
}
return false;
}
boolean matchesLetter() {
- if (isEmpty())
+ if (isEmpty()) {
return false;
+ }
char c = input.charAt(pos);
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
boolean matchesDigit() {
- if (isEmpty())
+ if (isEmpty()) {
return false;
+ }
char c = input.charAt(pos);
return (c >= '0' && c <= '9');
}
@@ -217,10 +229,12 @@ class CharacterReader {
}
boolean containsIgnoreCase(String seq) {
- // used to check presence of </title>, </style>. only finds consistent case.
+ // used to check presence of </title>, </style>. only finds consistent
+ // case.
String loScan = seq.toLowerCase();
String hiScan = seq.toUpperCase();
- return (input.indexOf(loScan, pos) > -1) || (input.indexOf(hiScan, pos) > -1);
+ return (input.indexOf(loScan, pos) > -1)
+ || (input.indexOf(hiScan, pos) > -1);
}
@Override
diff --git a/server/src/org/jsoup/parser/HtmlTreeBuilder.java b/server/src/org/jsoup/parser/HtmlTreeBuilder.java
index 457a4c3249..f09ab8794c 100644
--- a/server/src/org/jsoup/parser/HtmlTreeBuilder.java
+++ b/server/src/org/jsoup/parser/HtmlTreeBuilder.java
@@ -1,15 +1,20 @@
package org.jsoup.parser;
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
-
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import org.jsoup.helper.DescendableLinkedList;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Comment;
+import org.jsoup.nodes.DataNode;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+
/**
* HTML Tree Builder; creates a DOM from Tokens.
*/
@@ -21,15 +26,26 @@ class HtmlTreeBuilder extends TreeBuilder {
private boolean baseUriSetFromDoc = false;
private Element headElement; // the current head element
private Element formElement; // the current form element
- private Element contextElement; // fragment parse context -- could be null even if fragment parsing
- private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
- private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
+ private Element contextElement; // fragment parse context -- could be null
+ // even if fragment parsing
+ private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active
+ // (open)
+ // formatting
+ // elements
+ private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars
+ // in
+ // table
+ // to
+ // be
+ // shifted
+ // out
private boolean framesetOk = true; // if ok to go into frameset
private boolean fosterInserts = false; // if next inserts should be fostered
private boolean fragmentParsing = false; // if parsing a fragment of html
- HtmlTreeBuilder() {}
+ HtmlTreeBuilder() {
+ }
@Override
Document parse(String input, String baseUri, ParseErrorList errors) {
@@ -37,7 +53,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return super.parse(input, baseUri, errors);
}
- List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
+ List<Node> parseFragment(String inputFragment, Element context,
+ String baseUri, ParseErrorList errors) {
// context may be null
state = HtmlTreeBuilderState.Initial;
initialiseParse(inputFragment, baseUri, errors);
@@ -46,42 +63,48 @@ class HtmlTreeBuilder extends TreeBuilder {
Element root = null;
if (context != null) {
- if (context.ownerDocument() != null) // quirks setup:
+ if (context.ownerDocument() != null) {
doc.quirksMode(context.ownerDocument().quirksMode());
+ }
// initialise the tokeniser state:
String contextTag = context.tagName();
- if (StringUtil.in(contextTag, "title", "textarea"))
+ if (StringUtil.in(contextTag, "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata);
- else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
+ } else if (StringUtil.in(contextTag, "iframe", "noembed",
+ "noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext);
- else if (contextTag.equals("script"))
+ } else if (contextTag.equals("script")) {
tokeniser.transition(TokeniserState.ScriptData);
- else if (contextTag.equals(("noscript")))
- tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
- else if (contextTag.equals("plaintext"))
+ } else if (contextTag.equals(("noscript"))) {
+ tokeniser.transition(TokeniserState.Data); // if scripting
+ // enabled, rawtext
+ } else if (contextTag.equals("plaintext")) {
tokeniser.transition(TokeniserState.Data);
- else
+ } else {
tokeniser.transition(TokeniserState.Data); // default
+ }
root = new Element(Tag.valueOf("html"), baseUri);
doc.appendChild(root);
stack.push(root);
resetInsertionMode();
- // todo: setup form element to nearest form on context (up ancestor chain)
+ // todo: setup form element to nearest form on context (up ancestor
+ // chain)
}
runParser();
- if (context != null)
+ if (context != null) {
return root.childNodes();
- else
+ } else {
return doc.childNodes();
+ }
}
@Override
protected boolean process(Token token) {
currentToken = token;
- return this.state.process(token, this);
+ return state.process(token, this);
}
boolean process(Token token, HtmlTreeBuilderState state) {
@@ -122,14 +145,17 @@ class HtmlTreeBuilder extends TreeBuilder {
}
void maybeSetBaseUri(Element base) {
- if (baseUriSetFromDoc) // only listen to the first <base href> in parse
+ if (baseUriSetFromDoc) {
return;
+ }
String href = base.absUrl("href");
if (href.length() != 0) { // ignore <base target> etc
baseUri = href;
baseUriSetFromDoc = true;
- doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
+ doc.setBaseUri(href); // set on the doc so doc.createElement(Tag)
+ // will get updated base, and to update all
+ // descendants
}
}
@@ -138,20 +164,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}
void error(HtmlTreeBuilderState state) {
- if (errors.canAddError())
- errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state));
+ if (errors.canAddError()) {
+ errors.add(new ParseError(reader.pos(),
+ "Unexpected token [%s] when in state [%s]", currentToken
+ .tokenType(), state));
+ }
}
Element insert(Token.StartTag startTag) {
// handle empty unknown tags
- // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag.
+ // when the spec expects an empty tag, will directly hit insertEmpty, so
+ // won't generate fake end tag.
if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) {
Element el = insertEmpty(startTag);
- process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in
+ process(new Token.EndTag(el.tagName())); // ensure we get out of
+ // whatever state we are in
return el;
}
-
- Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes);
+
+ Element el = new Element(Tag.valueOf(startTag.name()), baseUri,
+ startTag.attributes);
insert(el);
return el;
}
@@ -173,8 +205,9 @@ class HtmlTreeBuilder extends TreeBuilder {
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
- if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output
+ if (!tag.isKnownTag()) {
tag.setSelfClosing();
+ }
}
return el;
}
@@ -187,29 +220,37 @@ class HtmlTreeBuilder extends TreeBuilder {
void insert(Token.Character characterToken) {
Node node;
// characters in script and style go in as datanodes, not text nodes
- if (StringUtil.in(currentElement().tagName(), "script", "style"))
+ if (StringUtil.in(currentElement().tagName(), "script", "style")) {
node = new DataNode(characterToken.getData(), baseUri);
- else
+ } else {
node = new TextNode(characterToken.getData(), baseUri);
- currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
+ }
+ currentElement().appendChild(node); // doesn't use insertNode, because
+ // we don't foster these; and will
+ // always have a stack.
}
private void insertNode(Node node) {
- // if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
- if (stack.size() == 0)
+ // if the stack hasn't been set up yet, elements (doctype, comments) go
+ // into the doc
+ if (stack.size() == 0) {
doc.appendChild(node);
- else if (isFosterInserts())
+ } else if (isFosterInserts()) {
insertInFosterParent(node);
- else
+ } else {
currentElement().appendChild(node);
+ }
}
Element pop() {
// todo - dev, remove validation check
- if (stack.peekLast().nodeName().equals("td") && !state.name().equals("InCell"))
+ if (stack.peekLast().nodeName().equals("td")
+ && !state.name().equals("InCell")) {
Validate.isFalse(true, "pop td not in cell");
- if (stack.peekLast().nodeName().equals("html"))
+ }
+ if (stack.peekLast().nodeName().equals("html")) {
Validate.isFalse(true, "popping html!");
+ }
return stack.pollLast();
}
@@ -225,7 +266,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return isElementInQueue(stack, el);
}
- private boolean isElementInQueue(DescendableLinkedList<Element> queue, Element element) {
+ private boolean isElementInQueue(DescendableLinkedList<Element> queue,
+ Element element) {
Iterator<Element> it = queue.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
@@ -313,10 +355,12 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
- if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
+ if (StringUtil.in(next.nodeName(), nodeNames)
+ || next.nodeName().equals("html")) {
break;
- else
+ } else {
it.remove();
+ }
}
}
@@ -335,14 +379,15 @@ class HtmlTreeBuilder extends TreeBuilder {
void insertOnStackAfter(Element after, Element in) {
int i = stack.lastIndexOf(after);
Validate.isTrue(i != -1);
- stack.add(i+1, in);
+ stack.add(i + 1, in);
}
void replaceOnStack(Element out, Element in) {
replaceInQueue(stack, out, in);
}
- private void replaceInQueue(LinkedList<Element> queue, Element out, Element in) {
+ private void replaceInQueue(LinkedList<Element> queue, Element out,
+ Element in) {
int i = queue.lastIndexOf(out);
Validate.isTrue(i != -1);
queue.remove(i);
@@ -368,7 +413,8 @@ class HtmlTreeBuilder extends TreeBuilder {
} else if ("tr".equals(name)) {
transition(HtmlTreeBuilderState.InRow);
break;
- } else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) {
+ } else if ("tbody".equals(name) || "thead".equals(name)
+ || "tfoot".equals(name)) {
transition(HtmlTreeBuilderState.InTableBody);
break;
} else if ("caption".equals(name)) {
@@ -400,28 +446,35 @@ class HtmlTreeBuilder extends TreeBuilder {
}
// todo: tidy up in specific scope methods
- private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
- return inSpecificScope(new String[]{targetName}, baseTypes, extraTypes);
+ private boolean inSpecificScope(String targetName, String[] baseTypes,
+ String[] extraTypes) {
+ return inSpecificScope(new String[] { targetName }, baseTypes,
+ extraTypes);
}
- private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
+ private boolean inSpecificScope(String[] targetNames, String[] baseTypes,
+ String[] extraTypes) {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
- if (StringUtil.in(elName, targetNames))
+ if (StringUtil.in(elName, targetNames)) {
return true;
- if (StringUtil.in(elName, baseTypes))
+ }
+ if (StringUtil.in(elName, baseTypes)) {
return false;
- if (extraTypes != null && StringUtil.in(elName, extraTypes))
+ }
+ if (extraTypes != null && StringUtil.in(elName, extraTypes)) {
return false;
+ }
}
Validate.fail("Should not be reachable");
return false;
}
boolean inScope(String[] targetNames) {
- return inSpecificScope(targetNames, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, null);
+ return inSpecificScope(targetNames, new String[] { "applet", "caption",
+ "html", "table", "td", "th", "marquee", "object" }, null);
}
boolean inScope(String targetName) {
@@ -429,21 +482,23 @@ class HtmlTreeBuilder extends TreeBuilder {
}
boolean inScope(String targetName, String[] extras) {
- return inSpecificScope(targetName, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, extras);
+ return inSpecificScope(targetName, new String[] { "applet", "caption",
+ "html", "table", "td", "th", "marquee", "object" }, extras);
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}
boolean inListItemScope(String targetName) {
- return inScope(targetName, new String[]{"ol", "ul"});
+ return inScope(targetName, new String[] { "ol", "ul" });
}
boolean inButtonScope(String targetName) {
- return inScope(targetName, new String[]{"button"});
+ return inScope(targetName, new String[] { "button" });
}
boolean inTableScope(String targetName) {
- return inSpecificScope(targetName, new String[]{"html", "table"}, null);
+ return inSpecificScope(targetName, new String[] { "html", "table" },
+ null);
}
boolean inSelectScope(String targetName) {
@@ -451,10 +506,12 @@ class HtmlTreeBuilder extends TreeBuilder {
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
- if (elName.equals(targetName))
+ if (elName.equals(targetName)) {
return true;
- if (!StringUtil.in(elName, "optgroup", "option")) // all elements except
+ }
+ if (!StringUtil.in(elName, "optgroup", "option")) {
return false;
+ }
}
Validate.fail("Should not be reachable");
return false;
@@ -497,18 +554,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}
/**
- 11.2.5.2 Closing elements that have implied end tags<p/>
- When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
- dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element,
- the UA must pop the current node off the stack of open elements.
-
- @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
- process, then the UA must perform the above steps as if that element was not in the above list.
+ * 11.2.5.2 Closing elements that have implied end tags
+ * <p/>
+ * When the steps below require the UA to generate implied end tags, then,
+ * while the current node is a dd element, a dt element, an li element, an
+ * option element, an optgroup element, a p element, an rp element, or an rt
+ * element, the UA must pop the current node off the stack of open elements.
+ *
+ * @param excludeTag
+ * If a step requires the UA to generate implied end tags but
+ * lists an element to exclude from the process, then the UA must
+ * perform the above steps as if that element was not in the
+ * above list.
*/
void generateImpliedEndTags(String excludeTag) {
- while ((excludeTag != null && !currentElement().nodeName().equals(excludeTag)) &&
- StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
+ while ((excludeTag != null && !currentElement().nodeName().equals(
+ excludeTag))
+ && StringUtil.in(currentElement().nodeName(), "dd", "dt", "li",
+ "option", "optgroup", "p", "rp", "rt")) {
pop();
+ }
}
void generateImpliedEndTags() {
@@ -519,14 +584,18 @@ class HtmlTreeBuilder extends TreeBuilder {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
String name = el.nodeName();
- return StringUtil.in(name, "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
- "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
- "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
- "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
- "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
- "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
- "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
- "title", "tr", "ul", "wbr", "xmp");
+ return StringUtil.in(name, "address", "applet", "area", "article",
+ "aside", "base", "basefont", "bgsound", "blockquote", "body",
+ "br", "button", "caption", "center", "col", "colgroup",
+ "command", "dd", "details", "dir", "div", "dl", "dt", "embed",
+ "fieldset", "figcaption", "figure", "footer", "form", "frame",
+ "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
+ "header", "hgroup", "hr", "html", "iframe", "img", "input",
+ "isindex", "li", "link", "listing", "marquee", "menu", "meta",
+ "nav", "noembed", "noframes", "noscript", "object", "ol", "p",
+ "param", "plaintext", "pre", "script", "section", "select",
+ "style", "summary", "table", "tbody", "td", "textarea",
+ "tfoot", "th", "thead", "title", "tr", "ul", "wbr", "xmp");
}
// active formatting elements
@@ -534,12 +603,14 @@ class HtmlTreeBuilder extends TreeBuilder {
int numSeen = 0;
Iterator<Element> iter = formattingElements.descendingIterator();
while (iter.hasNext()) {
- Element el = iter.next();
- if (el == null) // marker
+ Element el = iter.next();
+ if (el == null) {
break;
+ }
- if (isSameFormattingElement(in, el))
+ if (isSameFormattingElement(in, el)) {
numSeen++;
+ }
if (numSeen == 3) {
iter.remove();
@@ -550,17 +621,20 @@ class HtmlTreeBuilder extends TreeBuilder {
}
private boolean isSameFormattingElement(Element a, Element b) {
- // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
+ // same if: same namespace, tag, and attributes. Element.equals only
+ // checks tag, might in future check children
return a.nodeName().equals(b.nodeName()) &&
- // a.namespace().equals(b.namespace()) &&
+ // a.namespace().equals(b.namespace()) &&
a.attributes().equals(b.attributes());
// todo: namespaces
}
void reconstructFormattingElements() {
int size = formattingElements.size();
- if (size == 0 || formattingElements.getLast() == null || onStack(formattingElements.getLast()))
+ if (size == 0 || formattingElements.getLast() == null
+ || onStack(formattingElements.getLast())) {
return;
+ }
Element entry = formattingElements.getLast();
int pos = size - 1;
@@ -570,18 +644,24 @@ class HtmlTreeBuilder extends TreeBuilder {
skip = true;
break;
}
- entry = formattingElements.get(--pos); // step 5. one earlier than entry
- if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
+ entry = formattingElements.get(--pos); // step 5. one earlier than
+ // entry
+ if (entry == null || onStack(entry)) {
break; // jump to 8, else continue back to 4
+ }
}
- while(true) {
- if (!skip) // step 7: on later than entry
+ while (true) {
+ if (!skip) {
entry = formattingElements.get(++pos);
- Validate.notNull(entry); // should not occur, as we break at last element
+ }
+ Validate.notNull(entry); // should not occur, as we break at last
+ // element
- // 8. create new element from element, 9 insert into current node, onto stack
+ // 8. create new element from element, 9 insert into current node,
+ // onto stack
skip = false; // can only skip increment from 4.
- Element newEl = insert(entry.nodeName()); // todo: avoid fostering here?
+ Element newEl = insert(entry.nodeName()); // todo: avoid fostering
+ // here?
// newEl.namespace(entry.namespace()); // todo: namespaces
newEl.attributes().addAll(entry.attributes());
@@ -590,8 +670,9 @@ class HtmlTreeBuilder extends TreeBuilder {
formattingElements.remove(pos + 1);
// 11
- if (pos == size-1) // if not last entry in list, jump to 7
+ if (pos == size - 1) {
break;
+ }
}
}
@@ -599,8 +680,9 @@ class HtmlTreeBuilder extends TreeBuilder {
while (!formattingElements.isEmpty()) {
Element el = formattingElements.peekLast();
formattingElements.removeLast();
- if (el == null)
+ if (el == null) {
break;
+ }
}
}
@@ -623,10 +705,11 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = formattingElements.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
- if (next == null) // scope marker
+ if (next == null) {
break;
- else if (next.nodeName().equals(nodeName))
+ } else if (next.nodeName().equals(nodeName)) {
return next;
+ }
}
return null;
}
@@ -647,26 +730,25 @@ class HtmlTreeBuilder extends TreeBuilder {
if (lastTable.parent() != null) {
fosterParent = lastTable.parent();
isLastTableParent = true;
- } else
+ } else {
fosterParent = aboveOnStack(lastTable);
+ }
} else { // no table == frag
fosterParent = stack.get(0);
}
if (isLastTableParent) {
- Validate.notNull(lastTable); // last table cannot be null by this point.
+ Validate.notNull(lastTable); // last table cannot be null by this
+ // point.
lastTable.before(in);
- }
- else
+ } else {
fosterParent.appendChild(in);
+ }
}
@Override
public String toString() {
- return "TreeBuilder{" +
- "currentToken=" + currentToken +
- ", state=" + state +
- ", currentElement=" + currentElement() +
- '}';
+ return "TreeBuilder{" + "currentToken=" + currentToken + ", state="
+ + state + ", currentElement=" + currentElement() + '}';
}
}
diff --git a/server/src/org/jsoup/parser/HtmlTreeBuilderState.java b/server/src/org/jsoup/parser/HtmlTreeBuilderState.java
index ceab9faa5a..258d547a49 100644
--- a/server/src/org/jsoup/parser/HtmlTreeBuilderState.java
+++ b/server/src/org/jsoup/parser/HtmlTreeBuilderState.java
@@ -1,17 +1,24 @@
package org.jsoup.parser;
-import org.jsoup.helper.DescendableLinkedList;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.nodes.*;
-
import java.util.Iterator;
import java.util.LinkedList;
+import org.jsoup.helper.DescendableLinkedList;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.nodes.Attribute;
+import org.jsoup.nodes.Attributes;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.DocumentType;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+
/**
- * The Tree Builder's current state. Each state embodies the processing for the state, and transitions to other states.
+ * The Tree Builder's current state. Each state embodies the processing for the
+ * state, and transitions to other states.
*/
enum HtmlTreeBuilderState {
Initial {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
return true; // ignore whitespace
@@ -21,10 +28,13 @@ enum HtmlTreeBuilderState {
// todo: parse error check on expected doctypes
// todo: quirk state check on doctype ids
Token.Doctype d = t.asDoctype();
- DocumentType doctype = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), tb.getBaseUri());
+ DocumentType doctype = new DocumentType(d.getName(),
+ d.getPublicIdentifier(), d.getSystemIdentifier(),
+ tb.getBaseUri());
tb.getDocument().appendChild(doctype);
- if (d.isForceQuirks())
+ if (d.isForceQuirks()) {
tb.getDocument().quirksMode(Document.QuirksMode.quirks);
+ }
tb.transition(BeforeHtml);
} else {
// todo: check not iframe srcdoc
@@ -35,6 +45,7 @@ enum HtmlTreeBuilderState {
}
},
BeforeHtml {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isDoctype()) {
tb.error(this);
@@ -46,7 +57,9 @@ enum HtmlTreeBuilderState {
} else if (t.isStartTag() && t.asStartTag().name().equals("html")) {
tb.insert(t.asStartTag());
tb.transition(BeforeHead);
- } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().name(), "head", "body", "html", "br"))) {
+ } else if (t.isEndTag()
+ && (StringUtil.in(t.asEndTag().name(), "head", "body",
+ "html", "br"))) {
return anythingElse(t, tb);
} else if (t.isEndTag()) {
tb.error(this);
@@ -64,6 +77,7 @@ enum HtmlTreeBuilderState {
}
},
BeforeHead {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
return true;
@@ -78,7 +92,9 @@ enum HtmlTreeBuilderState {
Element head = tb.insert(t.asStartTag());
tb.setHeadElement(head);
tb.transition(InHead);
- } else if (t.isEndTag() && (StringUtil.in(t.asEndTag().name(), "head", "body", "html", "br"))) {
+ } else if (t.isEndTag()
+ && (StringUtil.in(t.asEndTag().name(), "head", "body",
+ "html", "br"))) {
tb.process(new Token.StartTag("head"));
return tb.process(t);
} else if (t.isEndTag()) {
@@ -92,67 +108,71 @@ enum HtmlTreeBuilderState {
}
},
InHead {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
tb.insert(t.asCharacter());
return true;
}
switch (t.type) {
- case Comment:
- tb.insert(t.asComment());
- break;
- case Doctype:
+ case Comment:
+ tb.insert(t.asComment());
+ break;
+ case Doctype:
+ tb.error(this);
+ return false;
+ case StartTag:
+ Token.StartTag start = t.asStartTag();
+ String name = start.name();
+ if (name.equals("html")) {
+ return InBody.process(t, tb);
+ } else if (StringUtil.in(name, "base", "basefont", "bgsound",
+ "command", "link")) {
+ Element el = tb.insertEmpty(start);
+ // jsoup special: update base the frist time it is seen
+ if (name.equals("base") && el.hasAttr("href")) {
+ tb.maybeSetBaseUri(el);
+ }
+ } else if (name.equals("meta")) {
+ Element meta = tb.insertEmpty(start);
+ // todo: charset switches
+ } else if (name.equals("title")) {
+ handleRcData(start, tb);
+ } else if (StringUtil.in(name, "noframes", "style")) {
+ handleRawtext(start, tb);
+ } else if (name.equals("noscript")) {
+ // else if noscript && scripting flag = true: rawtext (jsoup
+ // doesn't run script, to handle as noscript)
+ tb.insert(start);
+ tb.transition(InHeadNoscript);
+ } else if (name.equals("script")) {
+ // skips some script rules as won't execute them
+ tb.insert(start);
+ tb.tokeniser.transition(TokeniserState.ScriptData);
+ tb.markInsertionMode();
+ tb.transition(Text);
+ } else if (name.equals("head")) {
tb.error(this);
return false;
- case StartTag:
- Token.StartTag start = t.asStartTag();
- String name = start.name();
- if (name.equals("html")) {
- return InBody.process(t, tb);
- } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link")) {
- Element el = tb.insertEmpty(start);
- // jsoup special: update base the frist time it is seen
- if (name.equals("base") && el.hasAttr("href"))
- tb.maybeSetBaseUri(el);
- } else if (name.equals("meta")) {
- Element meta = tb.insertEmpty(start);
- // todo: charset switches
- } else if (name.equals("title")) {
- handleRcData(start, tb);
- } else if (StringUtil.in(name, "noframes", "style")) {
- handleRawtext(start, tb);
- } else if (name.equals("noscript")) {
- // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript)
- tb.insert(start);
- tb.transition(InHeadNoscript);
- } else if (name.equals("script")) {
- // skips some script rules as won't execute them
- tb.insert(start);
- tb.tokeniser.transition(TokeniserState.ScriptData);
- tb.markInsertionMode();
- tb.transition(Text);
- } else if (name.equals("head")) {
- tb.error(this);
- return false;
- } else {
- return anythingElse(t, tb);
- }
- break;
- case EndTag:
- Token.EndTag end = t.asEndTag();
- name = end.name();
- if (name.equals("head")) {
- tb.pop();
- tb.transition(AfterHead);
- } else if (StringUtil.in(name, "body", "html", "br")) {
- return anythingElse(t, tb);
- } else {
- tb.error(this);
- return false;
- }
- break;
- default:
+ } else {
return anythingElse(t, tb);
+ }
+ break;
+ case EndTag:
+ Token.EndTag end = t.asEndTag();
+ name = end.name();
+ if (name.equals("head")) {
+ tb.pop();
+ tb.transition(AfterHead);
+ } else if (StringUtil.in(name, "body", "html", "br")) {
+ return anythingElse(t, tb);
+ } else {
+ tb.error(this);
+ return false;
+ }
+ break;
+ default:
+ return anythingElse(t, tb);
}
return true;
}
@@ -163,6 +183,7 @@ enum HtmlTreeBuilderState {
}
},
InHeadNoscript {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isDoctype()) {
tb.error(this);
@@ -171,12 +192,16 @@ enum HtmlTreeBuilderState {
} else if (t.isEndTag() && t.asEndTag().name().equals("noscript")) {
tb.pop();
tb.transition(InHead);
- } else if (isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.in(t.asStartTag().name(),
- "basefont", "bgsound", "link", "meta", "noframes", "style"))) {
+ } else if (isWhitespace(t)
+ || t.isComment()
+ || (t.isStartTag() && StringUtil.in(t.asStartTag().name(),
+ "basefont", "bgsound", "link", "meta", "noframes",
+ "style"))) {
return tb.process(t, InHead);
} else if (t.isEndTag() && t.asEndTag().name().equals("br")) {
return anythingElse(t, tb);
- } else if ((t.isStartTag() && StringUtil.in(t.asStartTag().name(), "head", "noscript")) || t.isEndTag()) {
+ } else if ((t.isStartTag() && StringUtil.in(t.asStartTag().name(),
+ "head", "noscript")) || t.isEndTag()) {
tb.error(this);
return false;
} else {
@@ -192,6 +217,7 @@ enum HtmlTreeBuilderState {
}
},
AfterHead {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
tb.insert(t.asCharacter());
@@ -211,7 +237,8 @@ enum HtmlTreeBuilderState {
} else if (name.equals("frameset")) {
tb.insert(startTag);
tb.transition(InFrameset);
- } else if (StringUtil.in(name, "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) {
+ } else if (StringUtil.in(name, "base", "basefont", "bgsound",
+ "link", "meta", "noframes", "script", "style", "title")) {
tb.error(this);
Element head = tb.getHeadElement();
tb.push(head);
@@ -243,519 +270,604 @@ enum HtmlTreeBuilderState {
}
},
InBody {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
switch (t.type) {
- case Character: {
- Token.Character c = t.asCharacter();
- if (c.getData().equals(nullString)) {
- // todo confirm that check
- tb.error(this);
- return false;
- } else if (isWhitespace(c)) {
- tb.reconstructFormattingElements();
- tb.insert(c);
- } else {
- tb.reconstructFormattingElements();
- tb.insert(c);
- tb.framesetOk(false);
- }
- break;
- }
- case Comment: {
- tb.insert(t.asComment());
- break;
- }
- case Doctype: {
+ case Character: {
+ Token.Character c = t.asCharacter();
+ if (c.getData().equals(nullString)) {
+ // todo confirm that check
tb.error(this);
return false;
+ } else if (isWhitespace(c)) {
+ tb.reconstructFormattingElements();
+ tb.insert(c);
+ } else {
+ tb.reconstructFormattingElements();
+ tb.insert(c);
+ tb.framesetOk(false);
}
- case StartTag:
- Token.StartTag startTag = t.asStartTag();
- String name = startTag.name();
- if (name.equals("html")) {
- tb.error(this);
- // merge attributes onto real html
- Element html = tb.getStack().getFirst();
- for (Attribute attribute : startTag.getAttributes()) {
- if (!html.hasAttr(attribute.getKey()))
- html.attributes().put(attribute);
- }
- } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title")) {
- return tb.process(t, InHead);
- } else if (name.equals("body")) {
- tb.error(this);
- LinkedList<Element> stack = tb.getStack();
- if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
- // only in fragment case
- return false; // ignore
- } else {
- tb.framesetOk(false);
- Element body = stack.get(1);
- for (Attribute attribute : startTag.getAttributes()) {
- if (!body.hasAttr(attribute.getKey()))
- body.attributes().put(attribute);
- }
- }
- } else if (name.equals("frameset")) {
- tb.error(this);
- LinkedList<Element> stack = tb.getStack();
- if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
- // only in fragment case
- return false; // ignore
- } else if (!tb.framesetOk()) {
- return false; // ignore frameset
- } else {
- Element second = stack.get(1);
- if (second.parent() != null)
- second.remove();
- // pop up to html element
- while (stack.size() > 1)
- stack.removeLast();
- tb.insert(startTag);
- tb.transition(InFrameset);
- }
- } else if (StringUtil.in(name,
- "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl",
- "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol",
- "p", "section", "summary", "ul")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- tb.insert(startTag);
- } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5", "h6")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- if (StringUtil.in(tb.currentElement().nodeName(), "h1", "h2", "h3", "h4", "h5", "h6")) {
- tb.error(this);
- tb.pop();
- }
- tb.insert(startTag);
- } else if (StringUtil.in(name, "pre", "listing")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- tb.insert(startTag);
- // todo: ignore LF if next token
- tb.framesetOk(false);
- } else if (name.equals("form")) {
- if (tb.getFormElement() != null) {
- tb.error(this);
- return false;
- }
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
+ break;
+ }
+ case Comment: {
+ tb.insert(t.asComment());
+ break;
+ }
+ case Doctype: {
+ tb.error(this);
+ return false;
+ }
+ case StartTag:
+ Token.StartTag startTag = t.asStartTag();
+ String name = startTag.name();
+ if (name.equals("html")) {
+ tb.error(this);
+ // merge attributes onto real html
+ Element html = tb.getStack().getFirst();
+ for (Attribute attribute : startTag.getAttributes()) {
+ if (!html.hasAttr(attribute.getKey())) {
+ html.attributes().put(attribute);
}
- Element form = tb.insert(startTag);
- tb.setFormElement(form);
- } else if (name.equals("li")) {
+ }
+ } else if (StringUtil.in(name, "base", "basefont", "bgsound",
+ "command", "link", "meta", "noframes", "script",
+ "style", "title")) {
+ return tb.process(t, InHead);
+ } else if (name.equals("body")) {
+ tb.error(this);
+ LinkedList<Element> stack = tb.getStack();
+ if (stack.size() == 1
+ || (stack.size() > 2 && !stack.get(1).nodeName()
+ .equals("body"))) {
+ // only in fragment case
+ return false; // ignore
+ } else {
tb.framesetOk(false);
- LinkedList<Element> stack = tb.getStack();
- for (int i = stack.size() - 1; i > 0; i--) {
- Element el = stack.get(i);
- if (el.nodeName().equals("li")) {
- tb.process(new Token.EndTag("li"));
- break;
+ Element body = stack.get(1);
+ for (Attribute attribute : startTag.getAttributes()) {
+ if (!body.hasAttr(attribute.getKey())) {
+ body.attributes().put(attribute);
}
- if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), "address", "div", "p"))
- break;
}
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- tb.insert(startTag);
- } else if (StringUtil.in(name, "dd", "dt")) {
- tb.framesetOk(false);
- LinkedList<Element> stack = tb.getStack();
- for (int i = stack.size() - 1; i > 0; i--) {
- Element el = stack.get(i);
- if (StringUtil.in(el.nodeName(), "dd", "dt")) {
- tb.process(new Token.EndTag(el.nodeName()));
- break;
- }
- if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), "address", "div", "p"))
- break;
+ }
+ } else if (name.equals("frameset")) {
+ tb.error(this);
+ LinkedList<Element> stack = tb.getStack();
+ if (stack.size() == 1
+ || (stack.size() > 2 && !stack.get(1).nodeName()
+ .equals("body"))) {
+ // only in fragment case
+ return false; // ignore
+ } else if (!tb.framesetOk()) {
+ return false; // ignore frameset
+ } else {
+ Element second = stack.get(1);
+ if (second.parent() != null) {
+ second.remove();
}
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
+ // pop up to html element
+ while (stack.size() > 1) {
+ stack.removeLast();
}
tb.insert(startTag);
- } else if (name.equals("plaintext")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
+ tb.transition(InFrameset);
+ }
+ } else if (StringUtil.in(name, "address", "article", "aside",
+ "blockquote", "center", "details", "dir", "div", "dl",
+ "fieldset", "figcaption", "figure", "footer", "header",
+ "hgroup", "menu", "nav", "ol", "p", "section",
+ "summary", "ul")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5",
+ "h6")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ if (StringUtil.in(tb.currentElement().nodeName(), "h1",
+ "h2", "h3", "h4", "h5", "h6")) {
+ tb.error(this);
+ tb.pop();
+ }
+ tb.insert(startTag);
+ } else if (StringUtil.in(name, "pre", "listing")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ // todo: ignore LF if next token
+ tb.framesetOk(false);
+ } else if (name.equals("form")) {
+ if (tb.getFormElement() != null) {
+ tb.error(this);
+ return false;
+ }
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ Element form = tb.insert(startTag);
+ tb.setFormElement(form);
+ } else if (name.equals("li")) {
+ tb.framesetOk(false);
+ LinkedList<Element> stack = tb.getStack();
+ for (int i = stack.size() - 1; i > 0; i--) {
+ Element el = stack.get(i);
+ if (el.nodeName().equals("li")) {
+ tb.process(new Token.EndTag("li"));
+ break;
}
- tb.insert(startTag);
- tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out
- } else if (name.equals("button")) {
- if (tb.inButtonScope("button")) {
- // close and reprocess
- tb.error(this);
- tb.process(new Token.EndTag("button"));
- tb.process(startTag);
- } else {
- tb.reconstructFormattingElements();
- tb.insert(startTag);
- tb.framesetOk(false);
+ if (tb.isSpecial(el)
+ && !StringUtil.in(el.nodeName(), "address",
+ "div", "p")) {
+ break;
}
- } else if (name.equals("a")) {
- if (tb.getActiveFormattingElement("a") != null) {
- tb.error(this);
- tb.process(new Token.EndTag("a"));
-
- // still on stack?
- Element remainingA = tb.getFromStack("a");
- if (remainingA != null) {
- tb.removeFromActiveFormattingElements(remainingA);
- tb.removeFromStack(remainingA);
- }
+ }
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ } else if (StringUtil.in(name, "dd", "dt")) {
+ tb.framesetOk(false);
+ LinkedList<Element> stack = tb.getStack();
+ for (int i = stack.size() - 1; i > 0; i--) {
+ Element el = stack.get(i);
+ if (StringUtil.in(el.nodeName(), "dd", "dt")) {
+ tb.process(new Token.EndTag(el.nodeName()));
+ break;
}
- tb.reconstructFormattingElements();
- Element a = tb.insert(startTag);
- tb.pushActiveFormattingElements(a);
- } else if (StringUtil.in(name,
- "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) {
- tb.reconstructFormattingElements();
- Element el = tb.insert(startTag);
- tb.pushActiveFormattingElements(el);
- } else if (name.equals("nobr")) {
- tb.reconstructFormattingElements();
- if (tb.inScope("nobr")) {
- tb.error(this);
- tb.process(new Token.EndTag("nobr"));
- tb.reconstructFormattingElements();
+ if (tb.isSpecial(el)
+ && !StringUtil.in(el.nodeName(), "address",
+ "div", "p")) {
+ break;
}
- Element el = tb.insert(startTag);
- tb.pushActiveFormattingElements(el);
- } else if (StringUtil.in(name, "applet", "marquee", "object")) {
+ }
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ } else if (name.equals("plaintext")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once
+ // in,
+ // never
+ // gets
+ // out
+ } else if (name.equals("button")) {
+ if (tb.inButtonScope("button")) {
+ // close and reprocess
+ tb.error(this);
+ tb.process(new Token.EndTag("button"));
+ tb.process(startTag);
+ } else {
tb.reconstructFormattingElements();
tb.insert(startTag);
- tb.insertMarkerToFormattingElements();
tb.framesetOk(false);
- } else if (name.equals("table")) {
- if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
+ }
+ } else if (name.equals("a")) {
+ if (tb.getActiveFormattingElement("a") != null) {
+ tb.error(this);
+ tb.process(new Token.EndTag("a"));
+
+ // still on stack?
+ Element remainingA = tb.getFromStack("a");
+ if (remainingA != null) {
+ tb.removeFromActiveFormattingElements(remainingA);
+ tb.removeFromStack(remainingA);
}
- tb.insert(startTag);
- tb.framesetOk(false);
- tb.transition(InTable);
- } else if (StringUtil.in(name, "area", "br", "embed", "img", "keygen", "wbr")) {
- tb.reconstructFormattingElements();
- tb.insertEmpty(startTag);
- tb.framesetOk(false);
- } else if (name.equals("input")) {
+ }
+ tb.reconstructFormattingElements();
+ Element a = tb.insert(startTag);
+ tb.pushActiveFormattingElements(a);
+ } else if (StringUtil.in(name, "b", "big", "code", "em",
+ "font", "i", "s", "small", "strike", "strong", "tt",
+ "u")) {
+ tb.reconstructFormattingElements();
+ Element el = tb.insert(startTag);
+ tb.pushActiveFormattingElements(el);
+ } else if (name.equals("nobr")) {
+ tb.reconstructFormattingElements();
+ if (tb.inScope("nobr")) {
+ tb.error(this);
+ tb.process(new Token.EndTag("nobr"));
tb.reconstructFormattingElements();
- Element el = tb.insertEmpty(startTag);
- if (!el.attr("type").equalsIgnoreCase("hidden"))
- tb.framesetOk(false);
- } else if (StringUtil.in(name, "param", "source", "track")) {
- tb.insertEmpty(startTag);
- } else if (name.equals("hr")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- tb.insertEmpty(startTag);
+ }
+ Element el = tb.insert(startTag);
+ tb.pushActiveFormattingElements(el);
+ } else if (StringUtil.in(name, "applet", "marquee", "object")) {
+ tb.reconstructFormattingElements();
+ tb.insert(startTag);
+ tb.insertMarkerToFormattingElements();
+ tb.framesetOk(false);
+ } else if (name.equals("table")) {
+ if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks
+ && tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insert(startTag);
+ tb.framesetOk(false);
+ tb.transition(InTable);
+ } else if (StringUtil.in(name, "area", "br", "embed", "img",
+ "keygen", "wbr")) {
+ tb.reconstructFormattingElements();
+ tb.insertEmpty(startTag);
+ tb.framesetOk(false);
+ } else if (name.equals("input")) {
+ tb.reconstructFormattingElements();
+ Element el = tb.insertEmpty(startTag);
+ if (!el.attr("type").equalsIgnoreCase("hidden")) {
tb.framesetOk(false);
- } else if (name.equals("image")) {
- // we're not supposed to ask.
- startTag.name("img");
- return tb.process(startTag);
- } else if (name.equals("isindex")) {
- // how much do we care about the early 90s?
- tb.error(this);
- if (tb.getFormElement() != null)
- return false;
+ }
+ } else if (StringUtil.in(name, "param", "source", "track")) {
+ tb.insertEmpty(startTag);
+ } else if (name.equals("hr")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.insertEmpty(startTag);
+ tb.framesetOk(false);
+ } else if (name.equals("image")) {
+ // we're not supposed to ask.
+ startTag.name("img");
+ return tb.process(startTag);
+ } else if (name.equals("isindex")) {
+ // how much do we care about the early 90s?
+ tb.error(this);
+ if (tb.getFormElement() != null) {
+ return false;
+ }
- tb.tokeniser.acknowledgeSelfClosingFlag();
- tb.process(new Token.StartTag("form"));
- if (startTag.attributes.hasKey("action")) {
- Element form = tb.getFormElement();
- form.attr("action", startTag.attributes.get("action"));
- }
- tb.process(new Token.StartTag("hr"));
- tb.process(new Token.StartTag("label"));
- // hope you like english.
- String prompt = startTag.attributes.hasKey("prompt") ?
- startTag.attributes.get("prompt") :
- "This is a searchable index. Enter search keywords: ";
+ tb.tokeniser.acknowledgeSelfClosingFlag();
+ tb.process(new Token.StartTag("form"));
+ if (startTag.attributes.hasKey("action")) {
+ Element form = tb.getFormElement();
+ form.attr("action", startTag.attributes.get("action"));
+ }
+ tb.process(new Token.StartTag("hr"));
+ tb.process(new Token.StartTag("label"));
+ // hope you like english.
+ String prompt = startTag.attributes.hasKey("prompt") ? startTag.attributes
+ .get("prompt")
+ : "This is a searchable index. Enter search keywords: ";
- tb.process(new Token.Character(prompt));
+ tb.process(new Token.Character(prompt));
- // input
- Attributes inputAttribs = new Attributes();
- for (Attribute attr : startTag.attributes) {
- if (!StringUtil.in(attr.getKey(), "name", "action", "prompt"))
- inputAttribs.put(attr);
+ // input
+ Attributes inputAttribs = new Attributes();
+ for (Attribute attr : startTag.attributes) {
+ if (!StringUtil.in(attr.getKey(), "name", "action",
+ "prompt")) {
+ inputAttribs.put(attr);
}
- inputAttribs.put("name", "isindex");
- tb.process(new Token.StartTag("input", inputAttribs));
- tb.process(new Token.EndTag("label"));
- tb.process(new Token.StartTag("hr"));
- tb.process(new Token.EndTag("form"));
- } else if (name.equals("textarea")) {
- tb.insert(startTag);
- // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
- tb.tokeniser.transition(TokeniserState.Rcdata);
- tb.markInsertionMode();
- tb.framesetOk(false);
- tb.transition(Text);
- } else if (name.equals("xmp")) {
- if (tb.inButtonScope("p")) {
- tb.process(new Token.EndTag("p"));
- }
- tb.reconstructFormattingElements();
- tb.framesetOk(false);
- handleRawtext(startTag, tb);
- } else if (name.equals("iframe")) {
- tb.framesetOk(false);
- handleRawtext(startTag, tb);
- } else if (name.equals("noembed")) {
- // also handle noscript if script enabled
- handleRawtext(startTag, tb);
- } else if (name.equals("select")) {
- tb.reconstructFormattingElements();
- tb.insert(startTag);
- tb.framesetOk(false);
+ }
+ inputAttribs.put("name", "isindex");
+ tb.process(new Token.StartTag("input", inputAttribs));
+ tb.process(new Token.EndTag("label"));
+ tb.process(new Token.StartTag("hr"));
+ tb.process(new Token.EndTag("form"));
+ } else if (name.equals("textarea")) {
+ tb.insert(startTag);
+ // todo: If the next token is a U+000A LINE FEED (LF)
+ // character token, then ignore that token and move on to
+ // the next one. (Newlines at the start of textarea elements
+ // are ignored as an authoring convenience.)
+ tb.tokeniser.transition(TokeniserState.Rcdata);
+ tb.markInsertionMode();
+ tb.framesetOk(false);
+ tb.transition(Text);
+ } else if (name.equals("xmp")) {
+ if (tb.inButtonScope("p")) {
+ tb.process(new Token.EndTag("p"));
+ }
+ tb.reconstructFormattingElements();
+ tb.framesetOk(false);
+ handleRawtext(startTag, tb);
+ } else if (name.equals("iframe")) {
+ tb.framesetOk(false);
+ handleRawtext(startTag, tb);
+ } else if (name.equals("noembed")) {
+ // also handle noscript if script enabled
+ handleRawtext(startTag, tb);
+ } else if (name.equals("select")) {
+ tb.reconstructFormattingElements();
+ tb.insert(startTag);
+ tb.framesetOk(false);
- HtmlTreeBuilderState state = tb.state();
- if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell))
- tb.transition(InSelectInTable);
- else
- tb.transition(InSelect);
- } else if (StringUtil.in("optgroup", "option")) {
- if (tb.currentElement().nodeName().equals("option"))
- tb.process(new Token.EndTag("option"));
- tb.reconstructFormattingElements();
- tb.insert(startTag);
- } else if (StringUtil.in("rp", "rt")) {
- if (tb.inScope("ruby")) {
- tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals("ruby")) {
- tb.error(this);
- tb.popStackToBefore("ruby"); // i.e. close up to but not include name
- }
- tb.insert(startTag);
+ HtmlTreeBuilderState state = tb.state();
+ if (state.equals(InTable) || state.equals(InCaption)
+ || state.equals(InTableBody) || state.equals(InRow)
+ || state.equals(InCell)) {
+ tb.transition(InSelectInTable);
+ } else {
+ tb.transition(InSelect);
+ }
+ } else if (StringUtil.in("optgroup", "option")) {
+ if (tb.currentElement().nodeName().equals("option")) {
+ tb.process(new Token.EndTag("option"));
+ }
+ tb.reconstructFormattingElements();
+ tb.insert(startTag);
+ } else if (StringUtil.in("rp", "rt")) {
+ if (tb.inScope("ruby")) {
+ tb.generateImpliedEndTags();
+ if (!tb.currentElement().nodeName().equals("ruby")) {
+ tb.error(this);
+ tb.popStackToBefore("ruby"); // i.e. close up to but
+ // not include name
}
- } else if (name.equals("math")) {
- tb.reconstructFormattingElements();
- // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
- tb.insert(startTag);
- tb.tokeniser.acknowledgeSelfClosingFlag();
- } else if (name.equals("svg")) {
- tb.reconstructFormattingElements();
- // todo: handle A start tag whose tag name is "svg" (xlink, svg)
tb.insert(startTag);
- tb.tokeniser.acknowledgeSelfClosingFlag();
- } else if (StringUtil.in(name,
- "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr")) {
+ }
+ } else if (name.equals("math")) {
+ tb.reconstructFormattingElements();
+ // todo: handle A start tag whose tag name is "math" (i.e.
+ // foreign, mathml)
+ tb.insert(startTag);
+ tb.tokeniser.acknowledgeSelfClosingFlag();
+ } else if (name.equals("svg")) {
+ tb.reconstructFormattingElements();
+ // todo: handle A start tag whose tag name is "svg" (xlink,
+ // svg)
+ tb.insert(startTag);
+ tb.tokeniser.acknowledgeSelfClosingFlag();
+ } else if (StringUtil.in(name, "caption", "col", "colgroup",
+ "frame", "head", "tbody", "td", "tfoot", "th", "thead",
+ "tr")) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.reconstructFormattingElements();
+ tb.insert(startTag);
+ }
+ break;
+
+ case EndTag:
+ Token.EndTag endTag = t.asEndTag();
+ name = endTag.name();
+ if (name.equals("body")) {
+ if (!tb.inScope("body")) {
tb.error(this);
return false;
} else {
- tb.reconstructFormattingElements();
- tb.insert(startTag);
+ // todo: error if stack contains something not dd, dt,
+ // li, optgroup, option, p, rp, rt, tbody, td, tfoot,
+ // th, thead, tr, body, html
+ tb.transition(AfterBody);
}
- break;
-
- case EndTag:
- Token.EndTag endTag = t.asEndTag();
- name = endTag.name();
- if (name.equals("body")) {
- if (!tb.inScope("body")) {
+ } else if (name.equals("html")) {
+ boolean notIgnored = tb.process(new Token.EndTag("body"));
+ if (notIgnored) {
+ return tb.process(endTag);
+ }
+ } else if (StringUtil.in(name, "address", "article", "aside",
+ "blockquote", "button", "center", "details", "dir",
+ "div", "dl", "fieldset", "figcaption", "figure",
+ "footer", "header", "hgroup", "listing", "menu", "nav",
+ "ol", "pre", "section", "summary", "ul")) {
+ // todo: refactor these lookups
+ if (!tb.inScope(name)) {
+ // nothing to close
+ tb.error(this);
+ return false;
+ } else {
+ tb.generateImpliedEndTags();
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- return false;
- } else {
- // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html
- tb.transition(AfterBody);
}
- } else if (name.equals("html")) {
- boolean notIgnored = tb.process(new Token.EndTag("body"));
- if (notIgnored)
- return tb.process(endTag);
- } else if (StringUtil.in(name,
- "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div",
- "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu",
- "nav", "ol", "pre", "section", "summary", "ul")) {
- // todo: refactor these lookups
- if (!tb.inScope(name)) {
- // nothing to close
+ tb.popStackToClose(name);
+ }
+ } else if (name.equals("form")) {
+ Element currentForm = tb.getFormElement();
+ tb.setFormElement(null);
+ if (currentForm == null || !tb.inScope(name)) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.generateImpliedEndTags();
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- return false;
- } else {
- tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose(name);
}
- } else if (name.equals("form")) {
- Element currentForm = tb.getFormElement();
- tb.setFormElement(null);
- if (currentForm == null || !tb.inScope(name)) {
+ // remove currentForm from stack. will shift anything
+ // under up.
+ tb.removeFromStack(currentForm);
+ }
+ } else if (name.equals("p")) {
+ if (!tb.inButtonScope(name)) {
+ tb.error(this);
+ tb.process(new Token.StartTag(name)); // if no p to
+ // close, creates
+ // an empty
+ // <p></p>
+ return tb.process(endTag);
+ } else {
+ tb.generateImpliedEndTags(name);
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- return false;
- } else {
- tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- // remove currentForm from stack. will shift anything under up.
- tb.removeFromStack(currentForm);
}
- } else if (name.equals("p")) {
- if (!tb.inButtonScope(name)) {
+ tb.popStackToClose(name);
+ }
+ } else if (name.equals("li")) {
+ if (!tb.inListItemScope(name)) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.generateImpliedEndTags(name);
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- tb.process(new Token.StartTag(name)); // if no p to close, creates an empty <p></p>
- return tb.process(endTag);
- } else {
- tb.generateImpliedEndTags(name);
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose(name);
}
- } else if (name.equals("li")) {
- if (!tb.inListItemScope(name)) {
+ tb.popStackToClose(name);
+ }
+ } else if (StringUtil.in(name, "dd", "dt")) {
+ if (!tb.inScope(name)) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.generateImpliedEndTags(name);
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- return false;
- } else {
- tb.generateImpliedEndTags(name);
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose(name);
}
- } else if (StringUtil.in(name, "dd", "dt")) {
- if (!tb.inScope(name)) {
+ tb.popStackToClose(name);
+ }
+ } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5",
+ "h6")) {
+ if (!tb.inScope(new String[] { "h1", "h2", "h3", "h4",
+ "h5", "h6" })) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.generateImpliedEndTags(name);
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
- return false;
- } else {
- tb.generateImpliedEndTags(name);
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose(name);
}
- } else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5", "h6")) {
- if (!tb.inScope(new String[]{"h1", "h2", "h3", "h4", "h5", "h6"})) {
+ tb.popStackToClose("h1", "h2", "h3", "h4", "h5", "h6");
+ }
+ } else if (name.equals("sarcasm")) {
+ // *sigh*
+ return anyOtherEndTag(t, tb);
+ } else if (StringUtil.in(name, "a", "b", "big", "code", "em",
+ "font", "i", "nobr", "s", "small", "strike", "strong",
+ "tt", "u")) {
+ // Adoption Agency Algorithm.
+ OUTER: for (int i = 0; i < 8; i++) {
+ Element formatEl = tb.getActiveFormattingElement(name);
+ if (formatEl == null) {
+ return anyOtherEndTag(t, tb);
+ } else if (!tb.onStack(formatEl)) {
+ tb.error(this);
+ tb.removeFromActiveFormattingElements(formatEl);
+ return true;
+ } else if (!tb.inScope(formatEl.nodeName())) {
tb.error(this);
return false;
- } else {
- tb.generateImpliedEndTags(name);
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose("h1", "h2", "h3", "h4", "h5", "h6");
+ } else if (tb.currentElement() != formatEl) {
+ tb.error(this);
+ }
+
+ Element furthestBlock = null;
+ Element commonAncestor = null;
+ boolean seenFormattingElement = false;
+ LinkedList<Element> stack = tb.getStack();
+ for (int si = 0; si < stack.size(); si++) {
+ Element el = stack.get(si);
+ if (el == formatEl) {
+ commonAncestor = stack.get(si - 1);
+ seenFormattingElement = true;
+ } else if (seenFormattingElement
+ && tb.isSpecial(el)) {
+ furthestBlock = el;
+ break;
+ }
+ }
+ if (furthestBlock == null) {
+ tb.popStackToClose(formatEl.nodeName());
+ tb.removeFromActiveFormattingElements(formatEl);
+ return true;
}
- } else if (name.equals("sarcasm")) {
- // *sigh*
- return anyOtherEndTag(t, tb);
- } else if (StringUtil.in(name,
- "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) {
- // Adoption Agency Algorithm.
- OUTER:
- for (int i = 0; i < 8; i++) {
- Element formatEl = tb.getActiveFormattingElement(name);
- if (formatEl == null)
- return anyOtherEndTag(t, tb);
- else if (!tb.onStack(formatEl)) {
- tb.error(this);
- tb.removeFromActiveFormattingElements(formatEl);
- return true;
- } else if (!tb.inScope(formatEl.nodeName())) {
- tb.error(this);
- return false;
- } else if (tb.currentElement() != formatEl)
- tb.error(this);
- Element furthestBlock = null;
- Element commonAncestor = null;
- boolean seenFormattingElement = false;
- LinkedList<Element> stack = tb.getStack();
- for (int si = 0; si < stack.size(); si++) {
- Element el = stack.get(si);
- if (el == formatEl) {
- commonAncestor = stack.get(si - 1);
- seenFormattingElement = true;
- } else if (seenFormattingElement && tb.isSpecial(el)) {
- furthestBlock = el;
- break;
- }
+ // todo: Let a bookmark note the position of the
+ // formatting element in the list of active formatting
+ // elements relative to the elements on either side of
+ // it in the list.
+ // does that mean: int pos of format el in list?
+ Element node = furthestBlock;
+ Element lastNode = furthestBlock;
+ INNER: for (int j = 0; j < 3; j++) {
+ if (tb.onStack(node)) {
+ node = tb.aboveOnStack(node);
}
- if (furthestBlock == null) {
- tb.popStackToClose(formatEl.nodeName());
- tb.removeFromActiveFormattingElements(formatEl);
- return true;
+ if (!tb.isInActiveFormattingElements(node)) { // note
+ // no
+ // bookmark
+ // check
+ tb.removeFromStack(node);
+ continue INNER;
+ } else if (node == formatEl) {
+ break INNER;
}
- // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
- // does that mean: int pos of format el in list?
- Element node = furthestBlock;
- Element lastNode = furthestBlock;
- INNER:
- for (int j = 0; j < 3; j++) {
- if (tb.onStack(node))
- node = tb.aboveOnStack(node);
- if (!tb.isInActiveFormattingElements(node)) { // note no bookmark check
- tb.removeFromStack(node);
- continue INNER;
- } else if (node == formatEl)
- break INNER;
+ Element replacement = new Element(Tag.valueOf(node
+ .nodeName()), tb.getBaseUri());
+ tb.replaceActiveFormattingElement(node, replacement);
+ tb.replaceOnStack(node, replacement);
+ node = replacement;
- Element replacement = new Element(Tag.valueOf(node.nodeName()), tb.getBaseUri());
- tb.replaceActiveFormattingElement(node, replacement);
- tb.replaceOnStack(node, replacement);
- node = replacement;
+ if (lastNode == furthestBlock) {
+ // todo: move the aforementioned bookmark to be
+ // immediately after the new node in the list of
+ // active formatting elements.
+ // not getting how this bookmark both straddles
+ // the element above, but is inbetween here...
+ }
+ if (lastNode.parent() != null) {
+ lastNode.remove();
+ }
+ node.appendChild(lastNode);
- if (lastNode == furthestBlock) {
- // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements.
- // not getting how this bookmark both straddles the element above, but is inbetween here...
- }
- if (lastNode.parent() != null)
- lastNode.remove();
- node.appendChild(lastNode);
+ lastNode = node;
+ }
- lastNode = node;
+ if (StringUtil.in(commonAncestor.nodeName(), "table",
+ "tbody", "tfoot", "thead", "tr")) {
+ if (lastNode.parent() != null) {
+ lastNode.remove();
}
-
- if (StringUtil.in(commonAncestor.nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
- if (lastNode.parent() != null)
- lastNode.remove();
- tb.insertInFosterParent(lastNode);
- } else {
- if (lastNode.parent() != null)
- lastNode.remove();
- commonAncestor.appendChild(lastNode);
+ tb.insertInFosterParent(lastNode);
+ } else {
+ if (lastNode.parent() != null) {
+ lastNode.remove();
}
+ commonAncestor.appendChild(lastNode);
+ }
- Element adopter = new Element(Tag.valueOf(name), tb.getBaseUri());
- Node[] childNodes = furthestBlock.childNodes().toArray(new Node[furthestBlock.childNodes().size()]);
- for (Node childNode : childNodes) {
- adopter.appendChild(childNode); // append will reparent. thus the clone to avoid concurrent mod.
- }
- furthestBlock.appendChild(adopter);
- tb.removeFromActiveFormattingElements(formatEl);
- // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark.
- tb.removeFromStack(formatEl);
- tb.insertOnStackAfter(furthestBlock, adopter);
+ Element adopter = new Element(Tag.valueOf(name),
+ tb.getBaseUri());
+ Node[] childNodes = furthestBlock.childNodes().toArray(
+ new Node[furthestBlock.childNodes().size()]);
+ for (Node childNode : childNodes) {
+ adopter.appendChild(childNode); // append will
+ // reparent. thus
+ // the clone to
+ // avoid concurrent
+ // mod.
}
- } else if (StringUtil.in(name, "applet", "marquee", "object")) {
- if (!tb.inScope("name")) {
- if (!tb.inScope(name)) {
- tb.error(this);
- return false;
- }
- tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals(name))
- tb.error(this);
- tb.popStackToClose(name);
- tb.clearFormattingElementsToLastMarker();
+ furthestBlock.appendChild(adopter);
+ tb.removeFromActiveFormattingElements(formatEl);
+ // todo: insert the new element into the list of active
+ // formatting elements at the position of the
+ // aforementioned bookmark.
+ tb.removeFromStack(formatEl);
+ tb.insertOnStackAfter(furthestBlock, adopter);
+ }
+ } else if (StringUtil.in(name, "applet", "marquee", "object")) {
+ if (!tb.inScope("name")) {
+ if (!tb.inScope(name)) {
+ tb.error(this);
+ return false;
}
- } else if (name.equals("br")) {
- tb.error(this);
- tb.process(new Token.StartTag("br"));
- return false;
- } else {
- return anyOtherEndTag(t, tb);
+ tb.generateImpliedEndTags();
+ if (!tb.currentElement().nodeName().equals(name)) {
+ tb.error(this);
+ }
+ tb.popStackToClose(name);
+ tb.clearFormattingElementsToLastMarker();
}
+ } else if (name.equals("br")) {
+ tb.error(this);
+ tb.process(new Token.StartTag("br"));
+ return false;
+ } else {
+ return anyOtherEndTag(t, tb);
+ }
- break;
- case EOF:
- // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html
- // stop parsing
- break;
+ break;
+ case EOF:
+ // todo: error if stack contains something not dd, dt, li, p,
+ // tbody, td, tfoot, th, thead, tr, body, html
+ // stop parsing
+ break;
}
return true;
}
@@ -768,8 +880,9 @@ enum HtmlTreeBuilderState {
Element node = it.next();
if (node.nodeName().equals(name)) {
tb.generateImpliedEndTags(name);
- if (!name.equals(tb.currentElement().nodeName()))
+ if (!name.equals(tb.currentElement().nodeName())) {
tb.error(this);
+ }
tb.popStackToClose(name);
break;
} else {
@@ -784,6 +897,7 @@ enum HtmlTreeBuilderState {
},
Text {
// in script, style etc. normally treated as data tags
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isCharacter()) {
tb.insert(t.asCharacter());
@@ -794,7 +908,8 @@ enum HtmlTreeBuilderState {
tb.transition(tb.originalState());
return tb.process(t);
} else if (t.isEndTag()) {
- // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts
+ // if: An end tag whose tag name is "script" -- scripting
+ // nesting level, if evaluating scripts
tb.pop();
tb.transition(tb.originalState());
}
@@ -802,6 +917,7 @@ enum HtmlTreeBuilderState {
}
},
InTable {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isCharacter()) {
tb.newPendingTableCharacters();
@@ -839,21 +955,23 @@ enum HtmlTreeBuilderState {
} else if (name.equals("table")) {
tb.error(this);
boolean processed = tb.process(new Token.EndTag("table"));
- if (processed) // only ignored if in fragment
+ if (processed) {
return tb.process(t);
+ }
} else if (StringUtil.in(name, "style", "script")) {
return tb.process(t, InHead);
} else if (name.equals("input")) {
- if (!startTag.attributes.get("type").equalsIgnoreCase("hidden")) {
+ if (!startTag.attributes.get("type").equalsIgnoreCase(
+ "hidden")) {
return anythingElse(t, tb);
} else {
tb.insertEmpty(startTag);
}
} else if (name.equals("form")) {
tb.error(this);
- if (tb.getFormElement() != null)
+ if (tb.getFormElement() != null) {
return false;
- else {
+ } else {
Element form = tb.insertEmpty(startTag);
tb.setFormElement(form);
}
@@ -872,16 +990,18 @@ enum HtmlTreeBuilderState {
tb.popStackToClose("table");
}
tb.resetInsertionMode();
- } else if (StringUtil.in(name,
- "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
+ } else if (StringUtil.in(name, "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th",
+ "thead", "tr")) {
tb.error(this);
return false;
} else {
return anythingElse(t, tb);
}
} else if (t.isEOF()) {
- if (tb.currentElement().nodeName().equals("html"))
+ if (tb.currentElement().nodeName().equals("html")) {
tb.error(this);
+ }
return true; // stops parsing
}
return anythingElse(t, tb);
@@ -890,7 +1010,8 @@ enum HtmlTreeBuilderState {
boolean anythingElse(Token t, HtmlTreeBuilder tb) {
tb.error(this);
boolean processed = true;
- if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
+ if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody",
+ "tfoot", "thead", "tr")) {
tb.setFosterInserts(true);
processed = tb.process(t, InBody);
tb.setFosterInserts(false);
@@ -901,42 +1022,47 @@ enum HtmlTreeBuilderState {
}
},
InTableText {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
switch (t.type) {
- case Character:
- Token.Character c = t.asCharacter();
- if (c.getData().equals(nullString)) {
- tb.error(this);
- return false;
- } else {
- tb.getPendingTableCharacters().add(c);
- }
- break;
- default:
- if (tb.getPendingTableCharacters().size() > 0) {
- for (Token.Character character : tb.getPendingTableCharacters()) {
- if (!isWhitespace(character)) {
- // InTable anything else section:
- tb.error(this);
- if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) {
- tb.setFosterInserts(true);
- tb.process(character, InBody);
- tb.setFosterInserts(false);
- } else {
- tb.process(character, InBody);
- }
- } else
- tb.insert(character);
+ case Character:
+ Token.Character c = t.asCharacter();
+ if (c.getData().equals(nullString)) {
+ tb.error(this);
+ return false;
+ } else {
+ tb.getPendingTableCharacters().add(c);
+ }
+ break;
+ default:
+ if (tb.getPendingTableCharacters().size() > 0) {
+ for (Token.Character character : tb
+ .getPendingTableCharacters()) {
+ if (!isWhitespace(character)) {
+ // InTable anything else section:
+ tb.error(this);
+ if (StringUtil.in(tb.currentElement().nodeName(),
+ "table", "tbody", "tfoot", "thead", "tr")) {
+ tb.setFosterInserts(true);
+ tb.process(character, InBody);
+ tb.setFosterInserts(false);
+ } else {
+ tb.process(character, InBody);
+ }
+ } else {
+ tb.insert(character);
}
- tb.newPendingTableCharacters();
}
- tb.transition(tb.originalState());
- return tb.process(t);
+ tb.newPendingTableCharacters();
+ }
+ tb.transition(tb.originalState());
+ return tb.process(t);
}
return true;
}
},
InCaption {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isEndTag() && t.asEndTag().name().equals("caption")) {
Token.EndTag endTag = t.asEndTag();
@@ -946,23 +1072,27 @@ enum HtmlTreeBuilderState {
return false;
} else {
tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals("caption"))
+ if (!tb.currentElement().nodeName().equals("caption")) {
tb.error(this);
+ }
tb.popStackToClose("caption");
tb.clearFormattingElementsToLastMarker();
tb.transition(InTable);
}
- } else if ((
- t.isStartTag() && StringUtil.in(t.asStartTag().name(),
- "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr") ||
- t.isEndTag() && t.asEndTag().name().equals("table"))
- ) {
+ } else if ((t.isStartTag()
+ && StringUtil.in(t.asStartTag().name(), "caption", "col",
+ "colgroup", "tbody", "td", "tfoot", "th", "thead",
+ "tr") || t.isEndTag()
+ && t.asEndTag().name().equals("table"))) {
tb.error(this);
boolean processed = tb.process(new Token.EndTag("caption"));
- if (processed)
+ if (processed) {
return tb.process(t);
- } else if (t.isEndTag() && StringUtil.in(t.asEndTag().name(),
- "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr")) {
+ }
+ } else if (t.isEndTag()
+ && StringUtil.in(t.asEndTag().name(), "body", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th",
+ "thead", "tr")) {
tb.error(this);
return false;
} else {
@@ -972,113 +1102,127 @@ enum HtmlTreeBuilderState {
}
},
InColumnGroup {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
tb.insert(t.asCharacter());
return true;
}
switch (t.type) {
- case Comment:
- tb.insert(t.asComment());
- break;
- case Doctype:
- tb.error(this);
- break;
- case StartTag:
- Token.StartTag startTag = t.asStartTag();
- String name = startTag.name();
- if (name.equals("html"))
- return tb.process(t, InBody);
- else if (name.equals("col"))
- tb.insertEmpty(startTag);
- else
- return anythingElse(t, tb);
- break;
- case EndTag:
- Token.EndTag endTag = t.asEndTag();
- name = endTag.name();
- if (name.equals("colgroup")) {
- if (tb.currentElement().nodeName().equals("html")) { // frag case
- tb.error(this);
- return false;
- } else {
- tb.pop();
- tb.transition(InTable);
- }
- } else
- return anythingElse(t, tb);
- break;
- case EOF:
- if (tb.currentElement().nodeName().equals("html"))
- return true; // stop parsing; frag case
- else
- return anythingElse(t, tb);
- default:
+ case Comment:
+ tb.insert(t.asComment());
+ break;
+ case Doctype:
+ tb.error(this);
+ break;
+ case StartTag:
+ Token.StartTag startTag = t.asStartTag();
+ String name = startTag.name();
+ if (name.equals("html")) {
+ return tb.process(t, InBody);
+ } else if (name.equals("col")) {
+ tb.insertEmpty(startTag);
+ } else {
+ return anythingElse(t, tb);
+ }
+ break;
+ case EndTag:
+ Token.EndTag endTag = t.asEndTag();
+ name = endTag.name();
+ if (name.equals("colgroup")) {
+ if (tb.currentElement().nodeName().equals("html")) { // frag
+ // case
+ tb.error(this);
+ return false;
+ } else {
+ tb.pop();
+ tb.transition(InTable);
+ }
+ } else {
+ return anythingElse(t, tb);
+ }
+ break;
+ case EOF:
+ if (tb.currentElement().nodeName().equals("html")) {
+ return true; // stop parsing; frag case
+ } else {
return anythingElse(t, tb);
+ }
+ default:
+ return anythingElse(t, tb);
}
return true;
}
private boolean anythingElse(Token t, TreeBuilder tb) {
boolean processed = tb.process(new Token.EndTag("colgroup"));
- if (processed) // only ignored in frag case
+ if (processed) {
return tb.process(t);
+ }
return true;
}
},
InTableBody {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
switch (t.type) {
- case StartTag:
- Token.StartTag startTag = t.asStartTag();
- String name = startTag.name();
- if (name.equals("tr")) {
- tb.clearStackToTableBodyContext();
- tb.insert(startTag);
- tb.transition(InRow);
- } else if (StringUtil.in(name, "th", "td")) {
- tb.error(this);
- tb.process(new Token.StartTag("tr"));
- return tb.process(startTag);
- } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead")) {
- return exitTableBody(t, tb);
- } else
- return anythingElse(t, tb);
- break;
- case EndTag:
- Token.EndTag endTag = t.asEndTag();
- name = endTag.name();
- if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
- if (!tb.inTableScope(name)) {
- tb.error(this);
- return false;
- } else {
- tb.clearStackToTableBodyContext();
- tb.pop();
- tb.transition(InTable);
- }
- } else if (name.equals("table")) {
- return exitTableBody(t, tb);
- } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th", "tr")) {
+ case StartTag:
+ Token.StartTag startTag = t.asStartTag();
+ String name = startTag.name();
+ if (name.equals("tr")) {
+ tb.clearStackToTableBodyContext();
+ tb.insert(startTag);
+ tb.transition(InRow);
+ } else if (StringUtil.in(name, "th", "td")) {
+ tb.error(this);
+ tb.process(new Token.StartTag("tr"));
+ return tb.process(startTag);
+ } else if (StringUtil.in(name, "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead")) {
+ return exitTableBody(t, tb);
+ } else {
+ return anythingElse(t, tb);
+ }
+ break;
+ case EndTag:
+ Token.EndTag endTag = t.asEndTag();
+ name = endTag.name();
+ if (StringUtil.in(name, "tbody", "tfoot", "thead")) {
+ if (!tb.inTableScope(name)) {
tb.error(this);
return false;
- } else
- return anythingElse(t, tb);
- break;
- default:
+ } else {
+ tb.clearStackToTableBodyContext();
+ tb.pop();
+ tb.transition(InTable);
+ }
+ } else if (name.equals("table")) {
+ return exitTableBody(t, tb);
+ } else if (StringUtil.in(name, "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr")) {
+ tb.error(this);
+ return false;
+ } else {
return anythingElse(t, tb);
+ }
+ break;
+ default:
+ return anythingElse(t, tb);
}
return true;
}
private boolean exitTableBody(Token t, HtmlTreeBuilder tb) {
- if (!(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb.inScope("tfoot"))) {
+ if (!(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb
+ .inScope("tfoot"))) {
// frag case
tb.error(this);
return false;
}
tb.clearStackToTableBodyContext();
- tb.process(new Token.EndTag(tb.currentElement().nodeName())); // tbody, tfoot, thead
+ tb.process(new Token.EndTag(tb.currentElement().nodeName())); // tbody,
+ // tfoot,
+ // thead
return tb.process(t);
}
@@ -1087,6 +1231,7 @@ enum HtmlTreeBuilderState {
}
},
InRow {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isStartTag()) {
Token.StartTag startTag = t.asStartTag();
@@ -1097,7 +1242,8 @@ enum HtmlTreeBuilderState {
tb.insert(startTag);
tb.transition(InCell);
tb.insertMarkerToFormattingElements();
- } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) {
+ } else if (StringUtil.in(name, "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", "tr")) {
return handleMissingTr(t, tb);
} else {
return anythingElse(t, tb);
@@ -1123,7 +1269,8 @@ enum HtmlTreeBuilderState {
}
tb.process(new Token.EndTag("tr"));
return tb.process(t);
- } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th")) {
+ } else if (StringUtil.in(name, "body", "caption", "col",
+ "colgroup", "html", "td", "th")) {
tb.error(this);
return false;
} else {
@@ -1141,13 +1288,15 @@ enum HtmlTreeBuilderState {
private boolean handleMissingTr(Token t, TreeBuilder tb) {
boolean processed = tb.process(new Token.EndTag("tr"));
- if (processed)
+ if (processed) {
return tb.process(t);
- else
+ } else {
return false;
+ }
}
},
InCell {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isEndTag()) {
Token.EndTag endTag = t.asEndTag();
@@ -1156,19 +1305,24 @@ enum HtmlTreeBuilderState {
if (StringUtil.in(name, "td", "th")) {
if (!tb.inTableScope(name)) {
tb.error(this);
- tb.transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag
+ tb.transition(InRow); // might not be in scope if empty:
+ // <td /> and processing fake end
+ // tag
return false;
}
tb.generateImpliedEndTags();
- if (!tb.currentElement().nodeName().equals(name))
+ if (!tb.currentElement().nodeName().equals(name)) {
tb.error(this);
+ }
tb.popStackToClose(name);
tb.clearFormattingElementsToLastMarker();
tb.transition(InRow);
- } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html")) {
+ } else if (StringUtil.in(name, "body", "caption", "col",
+ "colgroup", "html")) {
tb.error(this);
return false;
- } else if (StringUtil.in(name, "table", "tbody", "tfoot", "thead", "tr")) {
+ } else if (StringUtil.in(name, "table", "tbody", "tfoot",
+ "thead", "tr")) {
if (!tb.inTableScope(name)) {
tb.error(this);
return false;
@@ -1178,9 +1332,10 @@ enum HtmlTreeBuilderState {
} else {
return anythingElse(t, tb);
}
- } else if (t.isStartTag() &&
- StringUtil.in(t.asStartTag().name(),
- "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
+ } else if (t.isStartTag()
+ && StringUtil.in(t.asStartTag().name(), "caption", "col",
+ "colgroup", "tbody", "td", "tfoot", "th", "thead",
+ "tr")) {
if (!(tb.inTableScope("td") || tb.inTableScope("th"))) {
tb.error(this);
return false;
@@ -1198,91 +1353,105 @@ enum HtmlTreeBuilderState {
}
private void closeCell(HtmlTreeBuilder tb) {
- if (tb.inTableScope("td"))
+ if (tb.inTableScope("td")) {
tb.process(new Token.EndTag("td"));
- else
- tb.process(new Token.EndTag("th")); // only here if th or td in scope
+ } else {
+ tb.process(new Token.EndTag("th")); // only here if th or td in
+ // scope
+ }
}
},
InSelect {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
switch (t.type) {
- case Character:
- Token.Character c = t.asCharacter();
- if (c.getData().equals(nullString)) {
- tb.error(this);
- return false;
- } else {
- tb.insert(c);
- }
- break;
- case Comment:
- tb.insert(t.asComment());
- break;
- case Doctype:
+ case Character:
+ Token.Character c = t.asCharacter();
+ if (c.getData().equals(nullString)) {
tb.error(this);
return false;
- case StartTag:
- Token.StartTag start = t.asStartTag();
- String name = start.name();
- if (name.equals("html"))
- return tb.process(start, InBody);
- else if (name.equals("option")) {
+ } else {
+ tb.insert(c);
+ }
+ break;
+ case Comment:
+ tb.insert(t.asComment());
+ break;
+ case Doctype:
+ tb.error(this);
+ return false;
+ case StartTag:
+ Token.StartTag start = t.asStartTag();
+ String name = start.name();
+ if (name.equals("html")) {
+ return tb.process(start, InBody);
+ } else if (name.equals("option")) {
+ tb.process(new Token.EndTag("option"));
+ tb.insert(start);
+ } else if (name.equals("optgroup")) {
+ if (tb.currentElement().nodeName().equals("option")) {
tb.process(new Token.EndTag("option"));
- tb.insert(start);
- } else if (name.equals("optgroup")) {
- if (tb.currentElement().nodeName().equals("option"))
- tb.process(new Token.EndTag("option"));
- else if (tb.currentElement().nodeName().equals("optgroup"))
- tb.process(new Token.EndTag("optgroup"));
- tb.insert(start);
- } else if (name.equals("select")) {
- tb.error(this);
- return tb.process(new Token.EndTag("select"));
- } else if (StringUtil.in(name, "input", "keygen", "textarea")) {
+ } else if (tb.currentElement().nodeName()
+ .equals("optgroup")) {
+ tb.process(new Token.EndTag("optgroup"));
+ }
+ tb.insert(start);
+ } else if (name.equals("select")) {
+ tb.error(this);
+ return tb.process(new Token.EndTag("select"));
+ } else if (StringUtil.in(name, "input", "keygen", "textarea")) {
+ tb.error(this);
+ if (!tb.inSelectScope("select")) {
+ return false; // frag
+ }
+ tb.process(new Token.EndTag("select"));
+ return tb.process(start);
+ } else if (name.equals("script")) {
+ return tb.process(t, InHead);
+ } else {
+ return anythingElse(t, tb);
+ }
+ break;
+ case EndTag:
+ Token.EndTag end = t.asEndTag();
+ name = end.name();
+ if (name.equals("optgroup")) {
+ if (tb.currentElement().nodeName().equals("option")
+ && tb.aboveOnStack(tb.currentElement()) != null
+ && tb.aboveOnStack(tb.currentElement()).nodeName()
+ .equals("optgroup")) {
+ tb.process(new Token.EndTag("option"));
+ }
+ if (tb.currentElement().nodeName().equals("optgroup")) {
+ tb.pop();
+ } else {
tb.error(this);
- if (!tb.inSelectScope("select"))
- return false; // frag
- tb.process(new Token.EndTag("select"));
- return tb.process(start);
- } else if (name.equals("script")) {
- return tb.process(t, InHead);
+ }
+ } else if (name.equals("option")) {
+ if (tb.currentElement().nodeName().equals("option")) {
+ tb.pop();
} else {
- return anythingElse(t, tb);
+ tb.error(this);
}
- break;
- case EndTag:
- Token.EndTag end = t.asEndTag();
- name = end.name();
- if (name.equals("optgroup")) {
- if (tb.currentElement().nodeName().equals("option") && tb.aboveOnStack(tb.currentElement()) != null && tb.aboveOnStack(tb.currentElement()).nodeName().equals("optgroup"))
- tb.process(new Token.EndTag("option"));
- if (tb.currentElement().nodeName().equals("optgroup"))
- tb.pop();
- else
- tb.error(this);
- } else if (name.equals("option")) {
- if (tb.currentElement().nodeName().equals("option"))
- tb.pop();
- else
- tb.error(this);
- } else if (name.equals("select")) {
- if (!tb.inSelectScope(name)) {
- tb.error(this);
- return false;
- } else {
- tb.popStackToClose(name);
- tb.resetInsertionMode();
- }
- } else
- return anythingElse(t, tb);
- break;
- case EOF:
- if (!tb.currentElement().nodeName().equals("html"))
+ } else if (name.equals("select")) {
+ if (!tb.inSelectScope(name)) {
tb.error(this);
- break;
- default:
+ return false;
+ } else {
+ tb.popStackToClose(name);
+ tb.resetInsertionMode();
+ }
+ } else {
return anythingElse(t, tb);
+ }
+ break;
+ case EOF:
+ if (!tb.currentElement().nodeName().equals("html")) {
+ tb.error(this);
+ }
+ break;
+ default:
+ return anythingElse(t, tb);
}
return true;
}
@@ -1293,24 +1462,31 @@ enum HtmlTreeBuilderState {
}
},
InSelectInTable {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
- if (t.isStartTag() && StringUtil.in(t.asStartTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
+ if (t.isStartTag()
+ && StringUtil.in(t.asStartTag().name(), "caption", "table",
+ "tbody", "tfoot", "thead", "tr", "td", "th")) {
tb.error(this);
tb.process(new Token.EndTag("select"));
return tb.process(t);
- } else if (t.isEndTag() && StringUtil.in(t.asEndTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) {
+ } else if (t.isEndTag()
+ && StringUtil.in(t.asEndTag().name(), "caption", "table",
+ "tbody", "tfoot", "thead", "tr", "td", "th")) {
tb.error(this);
if (tb.inTableScope(t.asEndTag().name())) {
tb.process(new Token.EndTag("select"));
return (tb.process(t));
- } else
+ } else {
return false;
+ }
} else {
return tb.process(t, InSelect);
}
}
},
AfterBody {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
return tb.process(t, InBody);
@@ -1339,6 +1515,7 @@ enum HtmlTreeBuilderState {
}
},
InFrameset {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
tb.insert(t.asCharacter());
@@ -1368,7 +1545,9 @@ enum HtmlTreeBuilderState {
return false;
} else {
tb.pop();
- if (!tb.isFragmentParsing() && !tb.currentElement().nodeName().equals("frameset")) {
+ if (!tb.isFragmentParsing()
+ && !tb.currentElement().nodeName()
+ .equals("frameset")) {
tb.transition(AfterFrameset);
}
}
@@ -1385,6 +1564,7 @@ enum HtmlTreeBuilderState {
}
},
AfterFrameset {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (isWhitespace(t)) {
tb.insert(t.asCharacter());
@@ -1397,7 +1577,8 @@ enum HtmlTreeBuilderState {
return tb.process(t, InBody);
} else if (t.isEndTag() && t.asEndTag().name().equals("html")) {
tb.transition(AfterAfterFrameset);
- } else if (t.isStartTag() && t.asStartTag().name().equals("noframes")) {
+ } else if (t.isStartTag()
+ && t.asStartTag().name().equals("noframes")) {
return tb.process(t, InHead);
} else if (t.isEOF()) {
// cool your heels, we're complete
@@ -1409,10 +1590,12 @@ enum HtmlTreeBuilderState {
}
},
AfterAfterBody {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isComment()) {
tb.insert(t.asComment());
- } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
+ } else if (t.isDoctype() || isWhitespace(t)
+ || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
return tb.process(t, InBody);
} else if (t.isEOF()) {
// nice work chuck
@@ -1425,14 +1608,17 @@ enum HtmlTreeBuilderState {
}
},
AfterAfterFrameset {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
if (t.isComment()) {
tb.insert(t.asComment());
- } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
+ } else if (t.isDoctype() || isWhitespace(t)
+ || (t.isStartTag() && t.asStartTag().name().equals("html"))) {
return tb.process(t, InBody);
} else if (t.isEOF()) {
// nice work chuck
- } else if (t.isStartTag() && t.asStartTag().name().equals("noframes")) {
+ } else if (t.isStartTag()
+ && t.asStartTag().name().equals("noframes")) {
return tb.process(t, InHead);
} else {
tb.error(this);
@@ -1442,6 +1628,7 @@ enum HtmlTreeBuilderState {
}
},
ForeignContent {
+ @Override
boolean process(Token t, HtmlTreeBuilder tb) {
return true;
// todo: implement. Also; how do we get here?
@@ -1458,8 +1645,9 @@ enum HtmlTreeBuilderState {
// todo: this checks more than spec - "\t", "\n", "\f", "\r", " "
for (int i = 0; i < data.length(); i++) {
char c = data.charAt(i);
- if (!StringUtil.isWhitespace(c))
+ if (!StringUtil.isWhitespace(c)) {
return false;
+ }
}
return true;
}
@@ -1473,7 +1661,8 @@ enum HtmlTreeBuilderState {
tb.transition(Text);
}
- private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) {
+ private static void handleRawtext(Token.StartTag startTag,
+ HtmlTreeBuilder tb) {
tb.insert(startTag);
tb.tokeniser.transition(TokeniserState.Rawtext);
tb.markInsertionMode();
diff --git a/server/src/org/jsoup/parser/ParseError.java b/server/src/org/jsoup/parser/ParseError.java
index dfa090051b..eb3c240a59 100644
--- a/server/src/org/jsoup/parser/ParseError.java
+++ b/server/src/org/jsoup/parser/ParseError.java
@@ -1,7 +1,8 @@
package org.jsoup.parser;
/**
- * A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
+ * A Parse Error records an error in the input HTML that occurs in either the
+ * tokenisation or the tree building phase.
*/
public class ParseError {
private int pos;
@@ -13,12 +14,13 @@ public class ParseError {
}
ParseError(int pos, String errorFormat, Object... args) {
- this.errorMsg = String.format(errorFormat, args);
+ errorMsg = String.format(errorFormat, args);
this.pos = pos;
}
/**
* Retrieve the error message.
+ *
* @return the error message.
*/
public String getErrorMessage() {
@@ -27,6 +29,7 @@ public class ParseError {
/**
* Retrieves the offset of the error.
+ *
* @return error offset within input
*/
public int getPosition() {
diff --git a/server/src/org/jsoup/parser/ParseErrorList.java b/server/src/org/jsoup/parser/ParseErrorList.java
index 3824ffbc4e..773dfcae24 100644
--- a/server/src/org/jsoup/parser/ParseErrorList.java
+++ b/server/src/org/jsoup/parser/ParseErrorList.java
@@ -7,15 +7,15 @@ import java.util.ArrayList;
*
* @author Jonathan Hedley
*/
-class ParseErrorList extends ArrayList<ParseError>{
+class ParseErrorList extends ArrayList<ParseError> {
private static final int INITIAL_CAPACITY = 16;
private final int maxSize;
-
+
ParseErrorList(int initialCapacity, int maxSize) {
super(initialCapacity);
this.maxSize = maxSize;
}
-
+
boolean canAddError() {
return size() < maxSize;
}
@@ -27,7 +27,7 @@ class ParseErrorList extends ArrayList<ParseError>{
static ParseErrorList noTracking() {
return new ParseErrorList(0, 0);
}
-
+
static ParseErrorList tracking(int maxSize) {
return new ParseErrorList(INITIAL_CAPACITY, maxSize);
}
diff --git a/server/src/org/jsoup/parser/Parser.java b/server/src/org/jsoup/parser/Parser.java
index 2236219c06..a1f6fd5184 100644
--- a/server/src/org/jsoup/parser/Parser.java
+++ b/server/src/org/jsoup/parser/Parser.java
@@ -1,32 +1,36 @@
package org.jsoup.parser;
+import java.util.List;
+
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
-import java.util.List;
-
/**
- * Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the more convenient parse methods
- * in {@link org.jsoup.Jsoup}.
+ * Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use
+ * one of the more convenient parse methods in {@link org.jsoup.Jsoup}.
*/
public class Parser {
- private static final int DEFAULT_MAX_ERRORS = 0; // by default, error tracking is disabled.
-
+ private static final int DEFAULT_MAX_ERRORS = 0; // by default, error
+ // tracking is disabled.
+
private TreeBuilder treeBuilder;
private int maxErrors = DEFAULT_MAX_ERRORS;
private ParseErrorList errors;
/**
* Create a new Parser, using the specified TreeBuilder
- * @param treeBuilder TreeBuilder to use to parse input into Documents.
+ *
+ * @param treeBuilder
+ * TreeBuilder to use to parse input into Documents.
*/
public Parser(TreeBuilder treeBuilder) {
this.treeBuilder = treeBuilder;
}
-
+
public Document parseInput(String html, String baseUri) {
- errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking();
+ errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors)
+ : ParseErrorList.noTracking();
Document doc = treeBuilder.parse(html, baseUri, errors);
return doc;
}
@@ -34,6 +38,7 @@ public class Parser {
// gets & sets
/**
* Get the TreeBuilder currently in use.
+ *
* @return current TreeBuilder.
*/
public TreeBuilder getTreeBuilder() {
@@ -42,7 +47,9 @@ public class Parser {
/**
* Update the TreeBuilder used when parsing content.
- * @param treeBuilder current TreeBuilder
+ *
+ * @param treeBuilder
+ * current TreeBuilder
* @return this, for chaining
*/
public Parser setTreeBuilder(TreeBuilder treeBuilder) {
@@ -52,6 +59,7 @@ public class Parser {
/**
* Check if parse error tracking is enabled.
+ *
* @return current track error state.
*/
public boolean isTrackErrors() {
@@ -60,7 +68,9 @@ public class Parser {
/**
* Enable or disable parse error tracking for the next parse.
- * @param maxErrors the maximum number of errors to track. Set to 0 to disable.
+ *
+ * @param maxErrors
+ * the maximum number of errors to track. Set to 0 to disable.
* @return this, for chaining
*/
public Parser setTrackErrors(int maxErrors) {
@@ -70,7 +80,9 @@ public class Parser {
/**
* Retrieve the parse errors, if any, from the last parse.
- * @return list of parse errors, up to the size of the maximum errors tracked.
+ *
+ * @return list of parse errors, up to the size of the maximum errors
+ * tracked.
*/
public List<ParseError> getErrors() {
return errors;
@@ -79,10 +91,13 @@ public class Parser {
// static parse functions below
/**
* Parse HTML into a Document.
- *
- * @param html HTML to parse
- * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
- *
+ *
+ * @param html
+ * HTML to parse
+ * @param baseUri
+ * base URI of document (i.e. original fetch location), for
+ * resolving relative URLs.
+ *
* @return parsed Document
*/
public static Document parse(String html, String baseUri) {
@@ -91,33 +106,49 @@ public class Parser {
}
/**
- * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
- *
- * @param fragmentHtml the fragment of HTML to parse
- * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
- * provides stack context (for implicit element creation).
- * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
- *
- * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
+ * Parse a fragment of HTML into a list of nodes. The context element, if
+ * supplied, supplies parsing context.
+ *
+ * @param fragmentHtml
+ * the fragment of HTML to parse
+ * @param context
+ * (optional) the element that this HTML fragment is being parsed
+ * for (i.e. for inner HTML). This provides stack context (for
+ * implicit element creation).
+ * @param baseUri
+ * base URI of document (i.e. original fetch location), for
+ * resolving relative URLs.
+ *
+ * @return list of nodes parsed from the input HTML. Note that the context
+ * element, if supplied, is not modified.
*/
- public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) {
+ public static List<Node> parseFragment(String fragmentHtml,
+ Element context, String baseUri) {
HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();
- return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking());
+ return treeBuilder.parseFragment(fragmentHtml, context, baseUri,
+ ParseErrorList.noTracking());
}
/**
* Parse a fragment of HTML into the {@code body} of a Document.
- *
- * @param bodyHtml fragment of HTML
- * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
- *
+ *
+ * @param bodyHtml
+ * fragment of HTML
+ * @param baseUri
+ * base URI of document (i.e. original fetch location), for
+ * resolving relative URLs.
+ *
* @return Document, with empty head, and HTML parsed into body
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
Document doc = Document.createShell(baseUri);
Element body = doc.body();
List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
- Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
+ Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node
+ // list gets
+ // modified
+ // when
+ // re-parented
for (Node node : nodes) {
body.appendChild(node);
}
@@ -125,21 +156,29 @@ public class Parser {
}
/**
- * @param bodyHtml HTML to parse
- * @param baseUri baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
- *
+ * @param bodyHtml
+ * HTML to parse
+ * @param baseUri
+ * baseUri base URI of document (i.e. original fetch location),
+ * for resolving relative URLs.
+ *
* @return parsed Document
- * @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment} instead.
+ * @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment}
+ * instead.
*/
- public static Document parseBodyFragmentRelaxed(String bodyHtml, String baseUri) {
+ @Deprecated
+ public static Document parseBodyFragmentRelaxed(String bodyHtml,
+ String baseUri) {
return parse(bodyHtml, baseUri);
}
-
+
// builders
/**
- * Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document,
- * based on a knowledge of the semantics of the incoming tags.
+ * Create a new HTML parser. This parser treats input as HTML5, and enforces
+ * the creation of a normalised document, based on a knowledge of the
+ * semantics of the incoming tags.
+ *
* @return a new HTML parser.
*/
public static Parser htmlParser() {
@@ -147,8 +186,10 @@ public class Parser {
}
/**
- * Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML,
- * rather creates a simple tree directly from the input.
+ * Create a new XML parser. This parser assumes no knowledge of the incoming
+ * tags and does not treat it as HTML, rather creates a simple tree directly
+ * from the input.
+ *
* @return a new simple XML parser.
*/
public static Parser xmlParser() {
diff --git a/server/src/org/jsoup/parser/Tag.java b/server/src/org/jsoup/parser/Tag.java
index 40b7557b39..c43f27aff3 100644
--- a/server/src/org/jsoup/parser/Tag.java
+++ b/server/src/org/jsoup/parser/Tag.java
@@ -1,25 +1,31 @@
package org.jsoup.parser;
-import org.jsoup.helper.Validate;
-
import java.util.HashMap;
import java.util.Map;
+import org.jsoup.helper.Validate;
+
/**
* HTML Tag capabilities.
- *
+ *
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Tag {
- private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map of known tags
+ private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map
+ // of
+ // known
+ // tags
private String tagName;
private boolean isBlock = true; // block or inline
private boolean formatAsBlock = true; // should be formatted as a block
- private boolean canContainBlock = true; // Can this tag hold block level tags?
+ private boolean canContainBlock = true; // Can this tag hold block level
+ // tags?
private boolean canContainInline = true; // only pcdata if not
private boolean empty = false; // can hold nothing; e.g. img
- private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
+ private boolean selfClosing = false; // can self close (<foo />). used for
+ // unknown tags that self close,
+ // without forcing them as empty.
private boolean preserveWhitespace = false; // for pre, textarea, script etc
private Tag(String tagName) {
@@ -28,7 +34,7 @@ public class Tag {
/**
* Get this tag's name.
- *
+ *
* @return the tag's name
*/
public String getName() {
@@ -36,11 +42,14 @@ public class Tag {
}
/**
- * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
+ * Get a Tag by name. If not previously defined (unknown), returns a new
+ * generic tag, that can do anything.
* <p/>
- * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
- *
- * @param tagName Name of tag, e.g. "p". Case insensitive.
+ * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not
+ * registered and will only .equals().
+ *
+ * @param tagName
+ * Name of tag, e.g. "p". Case insensitive.
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName) {
@@ -51,7 +60,8 @@ public class Tag {
synchronized (tags) {
Tag tag = tags.get(tagName);
if (tag == null) {
- // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
+ // not defined: create default; go anywhere, do anything! (incl
+ // be inside a <p>)
tag = new Tag(tagName);
tag.isBlock = false;
tag.canContainBlock = true;
@@ -62,7 +72,7 @@ public class Tag {
/**
* Gets if this is a block tag.
- *
+ *
* @return if block tag
*/
public boolean isBlock() {
@@ -71,7 +81,7 @@ public class Tag {
/**
* Gets if this tag should be formatted as a block (or as inline)
- *
+ *
* @return if should be formatted as block or inline
*/
public boolean formatAsBlock() {
@@ -80,7 +90,7 @@ public class Tag {
/**
* Gets if this tag can contain block tags.
- *
+ *
* @return if tag can contain block tags
*/
public boolean canContainBlock() {
@@ -89,7 +99,7 @@ public class Tag {
/**
* Gets if this tag is an inline tag.
- *
+ *
* @return if this tag is an inline tag.
*/
public boolean isInline() {
@@ -98,7 +108,7 @@ public class Tag {
/**
* Gets if this tag is a data only tag.
- *
+ *
* @return if this tag is a data only tag
*/
public boolean isData() {
@@ -107,7 +117,7 @@ public class Tag {
/**
* Get if this is an empty tag
- *
+ *
* @return if this is an empty tag
*/
public boolean isEmpty() {
@@ -116,7 +126,7 @@ public class Tag {
/**
* Get if this tag is self closing.
- *
+ *
* @return if this tag should be output as self closing.
*/
public boolean isSelfClosing() {
@@ -125,7 +135,7 @@ public class Tag {
/**
* Get if this is a pre-defined tag, or was auto created on parsing.
- *
+ *
* @return if a known tag
*/
public boolean isKnownTag() {
@@ -134,8 +144,9 @@ public class Tag {
/**
* Check if this tagname is a known tag.
- *
- * @param tagName name of tag
+ *
+ * @param tagName
+ * name of tag
* @return if known HTML tag
*/
public static boolean isKnownTag(String tagName) {
@@ -144,7 +155,7 @@ public class Tag {
/**
* Get if this tag should preserve whitespace within child text nodes.
- *
+ *
* @return if preserve whitepace
*/
public boolean preserveWhitespace() {
@@ -158,19 +169,39 @@ public class Tag {
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof Tag)) return false;
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Tag)) {
+ return false;
+ }
Tag tag = (Tag) o;
- if (canContainBlock != tag.canContainBlock) return false;
- if (canContainInline != tag.canContainInline) return false;
- if (empty != tag.empty) return false;
- if (formatAsBlock != tag.formatAsBlock) return false;
- if (isBlock != tag.isBlock) return false;
- if (preserveWhitespace != tag.preserveWhitespace) return false;
- if (selfClosing != tag.selfClosing) return false;
- if (!tagName.equals(tag.tagName)) return false;
+ if (canContainBlock != tag.canContainBlock) {
+ return false;
+ }
+ if (canContainInline != tag.canContainInline) {
+ return false;
+ }
+ if (empty != tag.empty) {
+ return false;
+ }
+ if (formatAsBlock != tag.formatAsBlock) {
+ return false;
+ }
+ if (isBlock != tag.isBlock) {
+ return false;
+ }
+ if (preserveWhitespace != tag.preserveWhitespace) {
+ return false;
+ }
+ if (selfClosing != tag.selfClosing) {
+ return false;
+ }
+ if (!tagName.equals(tag.tagName)) {
+ return false;
+ }
return true;
}
@@ -188,34 +219,39 @@ public class Tag {
return result;
}
+ @Override
public String toString() {
return tagName;
}
// internal static initialisers:
- // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
- private static final String[] blockTags = {
- "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
- "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
- "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
- "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
- "td", "video", "audio", "canvas", "details", "menu", "plaintext"
- };
- private static final String[] inlineTags = {
- "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
- "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
- "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
- "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
- "summary", "command", "device"
- };
- private static final String[] emptyTags = {
- "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
- "device"
- };
- private static final String[] formatAsInlineTags = {
- "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style"
- };
- private static final String[] preserveWhitespaceTags = {"pre", "plaintext", "title"};
+ // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other
+ // sources
+ private static final String[] blockTags = { "html", "head", "body",
+ "frameset", "script", "noscript", "style", "meta", "link", "title",
+ "frame", "noframes", "section", "nav", "aside", "hgroup", "header",
+ "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol",
+ "pre", "div", "blockquote", "hr", "address", "figure",
+ "figcaption", "form", "fieldset", "ins", "del", "dl", "dt", "dd",
+ "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup",
+ "col", "tr", "th", "td", "video", "audio", "canvas", "details",
+ "menu", "plaintext" };
+ private static final String[] inlineTags = { "object", "base", "font",
+ "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code",
+ "samp", "kbd", "var", "cite", "abbr", "time", "acronym", "mark",
+ "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q", "sub",
+ "sup", "bdo", "iframe", "embed", "span", "input", "select",
+ "textarea", "label", "button", "optgroup", "option", "legend",
+ "datalist", "keygen", "output", "progress", "meter", "area",
+ "param", "source", "track", "summary", "command", "device" };
+ private static final String[] emptyTags = { "meta", "link", "base",
+ "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen",
+ "col", "command", "device" };
+ private static final String[] formatAsInlineTags = { "title", "a", "p",
+ "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th",
+ "td", "script", "style" };
+ private static final String[] preserveWhitespaceTags = { "pre",
+ "plaintext", "title" };
static {
// creates
diff --git a/server/src/org/jsoup/parser/Token.java b/server/src/org/jsoup/parser/Token.java
index 9f4f9e250d..e465eb74e3 100644
--- a/server/src/org/jsoup/parser/Token.java
+++ b/server/src/org/jsoup/parser/Token.java
@@ -12,7 +12,7 @@ abstract class Token {
private Token() {
}
-
+
String tokenType() {
return this.getClass().getSimpleName();
}
@@ -50,13 +50,16 @@ abstract class Token {
private String pendingAttributeValue;
boolean selfClosing = false;
- Attributes attributes = new Attributes(); // todo: allow nodes to not have attributes
+ Attributes attributes = new Attributes(); // todo: allow nodes to not
+ // have attributes
void newAttribute() {
if (pendingAttributeName != null) {
- if (pendingAttributeValue == null)
+ if (pendingAttributeValue == null) {
pendingAttributeValue = "";
- Attribute attribute = new Attribute(pendingAttributeName, pendingAttributeValue);
+ }
+ Attribute attribute = new Attribute(pendingAttributeName,
+ pendingAttributeValue);
attributes.put(attribute);
}
pendingAttributeName = null;
@@ -85,12 +88,13 @@ abstract class Token {
return selfClosing;
}
- @SuppressWarnings({"TypeMayBeWeakened"})
+ @SuppressWarnings({ "TypeMayBeWeakened" })
Attributes getAttributes() {
return attributes;
}
- // these appenders are rarely hit in not null state-- caused by null chars.
+ // these appenders are rarely hit in not null state-- caused by null
+ // chars.
void appendTagName(String append) {
tagName = tagName == null ? append : tagName.concat(append);
}
@@ -100,7 +104,8 @@ abstract class Token {
}
void appendAttributeName(String append) {
- pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
+ pendingAttributeName = pendingAttributeName == null ? append
+ : pendingAttributeName.concat(append);
}
void appendAttributeName(char append) {
@@ -108,7 +113,8 @@ abstract class Token {
}
void appendAttributeValue(String append) {
- pendingAttributeValue = pendingAttributeValue == null ? append : pendingAttributeValue.concat(append);
+ pendingAttributeValue = pendingAttributeValue == null ? append
+ : pendingAttributeValue.concat(append);
}
void appendAttributeValue(char append) {
@@ -124,12 +130,12 @@ abstract class Token {
StartTag(String name) {
this();
- this.tagName = name;
+ tagName = name;
}
StartTag(String name, Attributes attributes) {
this();
- this.tagName = name;
+ tagName = name;
this.attributes = attributes;
}
@@ -139,7 +145,7 @@ abstract class Token {
}
}
- static class EndTag extends Tag{
+ static class EndTag extends Tag {
EndTag() {
super();
type = TokenType.EndTag;
@@ -147,7 +153,7 @@ abstract class Token {
EndTag(String name) {
this();
- this.tagName = name;
+ tagName = name;
}
@Override
@@ -242,11 +248,6 @@ abstract class Token {
}
enum TokenType {
- Doctype,
- StartTag,
- EndTag,
- Comment,
- Character,
- EOF
+ Doctype, StartTag, EndTag, Comment, Character, EOF
}
}
diff --git a/server/src/org/jsoup/parser/TokenQueue.java b/server/src/org/jsoup/parser/TokenQueue.java
index a2fdfe621a..3e7127e640 100644
--- a/server/src/org/jsoup/parser/TokenQueue.java
+++ b/server/src/org/jsoup/parser/TokenQueue.java
@@ -5,18 +5,20 @@ import org.jsoup.helper.Validate;
/**
* A character queue with parsing helpers.
- *
+ *
* @author Jonathan Hedley
*/
public class TokenQueue {
private String queue;
private int pos = 0;
-
+
private static final char ESC = '\\'; // escape char for chomp balanced.
/**
- Create a new TokenQueue.
- @param data string of data to back queue.
+ * Create a new TokenQueue.
+ *
+ * @param data
+ * string of data to back queue.
*/
public TokenQueue(String data) {
Validate.notNull(data);
@@ -25,18 +27,20 @@ public class TokenQueue {
/**
* Is the queue empty?
+ *
* @return true if no data left in queue.
*/
public boolean isEmpty() {
return remainingLength() == 0;
}
-
+
private int remainingLength() {
return queue.length() - pos;
}
/**
* Retrieves but does not remove the first character from the queue.
+ *
* @return First character, or 0 if empty.
*/
public char peek() {
@@ -44,16 +48,21 @@ public class TokenQueue {
}
/**
- Add a character to the start of the queue (will be the next character retrieved).
- @param c character to add
+ * Add a character to the start of the queue (will be the next character
+ * retrieved).
+ *
+ * @param c
+ * character to add
*/
public void addFirst(Character c) {
addFirst(c.toString());
}
/**
- Add a string to the start of the queue.
- @param seq string to add.
+ * Add a string to the start of the queue.
+ *
+ * @param seq
+ * string to add.
*/
public void addFirst(String seq) {
// not very performant, but an edge case
@@ -62,8 +71,11 @@ public class TokenQueue {
}
/**
- * Tests if the next characters on the queue match the sequence. Case insensitive.
- * @param seq String to check queue for.
+ * Tests if the next characters on the queue match the sequence. Case
+ * insensitive.
+ *
+ * @param seq
+ * String to check queue for.
* @return true if the next characters match.
*/
public boolean matches(String seq) {
@@ -72,47 +84,57 @@ public class TokenQueue {
/**
* Case sensitive match test.
- * @param seq string to case sensitively check for
+ *
+ * @param seq
+ * string to case sensitively check for
* @return true if matched, false if not
*/
public boolean matchesCS(String seq) {
return queue.startsWith(seq, pos);
}
-
/**
- Tests if the next characters match any of the sequences. Case insensitive.
- @param seq list of strings to case insensitively check for
- @return true of any matched, false if none did
+ * Tests if the next characters match any of the sequences. Case
+ * insensitive.
+ *
+ * @param seq
+ * list of strings to case insensitively check for
+ * @return true of any matched, false if none did
*/
public boolean matchesAny(String... seq) {
for (String s : seq) {
- if (matches(s))
+ if (matches(s)) {
return true;
+ }
}
return false;
}
public boolean matchesAny(char... seq) {
- if (isEmpty())
+ if (isEmpty()) {
return false;
+ }
- for (char c: seq) {
- if (queue.charAt(pos) == c)
+ for (char c : seq) {
+ if (queue.charAt(pos) == c) {
return true;
+ }
}
return false;
}
public boolean matchesStartTag() {
// micro opt for matching "<x"
- return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));
+ return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character
+ .isLetter(queue.charAt(pos + 1)));
}
/**
- * Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
- * queue.
- * @param seq String to search for, and if found, remove from queue.
+ * Tests if the queue matches the sequence (as with match), and if they do,
+ * removes the matched string from the queue.
+ *
+ * @param seq
+ * String to search for, and if found, remove from queue.
* @return true if found and removed, false if not found.
*/
public boolean matchChomp(String seq) {
@@ -125,16 +147,18 @@ public class TokenQueue {
}
/**
- Tests if queue starts with a whitespace character.
- @return if starts with whitespace
+ * Tests if queue starts with a whitespace character.
+ *
+ * @return if starts with whitespace
*/
public boolean matchesWhitespace() {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
}
/**
- Test if the queue matches a word character (letter or digit).
- @return if matches a word character
+ * Test if the queue matches a word character (letter or digit).
+ *
+ * @return if matches a word character
*/
public boolean matchesWord() {
return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
@@ -144,11 +168,14 @@ public class TokenQueue {
* Drops the next character off the queue.
*/
public void advance() {
- if (!isEmpty()) pos++;
+ if (!isEmpty()) {
+ pos++;
+ }
}
/**
* Consume one character off queue.
+ *
* @return first character on queue.
*/
public char consume() {
@@ -156,25 +183,36 @@ public class TokenQueue {
}
/**
- * Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
- * throw an illegal state exception -- but you should be running match() against that condition.
- <p>
- Case insensitive.
- * @param seq sequence to remove from head of queue.
+ * Consumes the supplied sequence of the queue. If the queue does not start
+ * with the supplied sequence, will throw an illegal state exception -- but
+ * you should be running match() against that condition.
+ * <p>
+ * Case insensitive.
+ *
+ * @param seq
+ * sequence to remove from head of queue.
*/
public void consume(String seq) {
- if (!matches(seq))
- throw new IllegalStateException("Queue did not match expected sequence");
+ if (!matches(seq)) {
+ throw new IllegalStateException(
+ "Queue did not match expected sequence");
+ }
int len = seq.length();
- if (len > remainingLength())
- throw new IllegalStateException("Queue not long enough to consume sequence");
-
+ if (len > remainingLength()) {
+ throw new IllegalStateException(
+ "Queue not long enough to consume sequence");
+ }
+
pos += len;
}
/**
- * Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
- * @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
+ * Pulls a string off the queue, up to but exclusive of the match sequence,
+ * or to the queue running out.
+ *
+ * @param seq
+ * String to end on (and not include in return, but leave on
+ * queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
public String consumeTo(String seq) {
@@ -187,38 +225,52 @@ public class TokenQueue {
return remainder();
}
}
-
+
public String consumeToIgnoreCase(String seq) {
int start = pos;
String first = seq.substring(0, 1);
- boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
+ boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if
+ // first
+ // is
+ // not
+ // cased,
+ // use
+ // index
+ // of
while (!isEmpty()) {
- if (matches(seq))
+ if (matches(seq)) {
break;
-
+ }
+
if (canScan) {
int skip = queue.indexOf(first, pos) - pos;
- if (skip == 0) // this char is the skip char, but not match, so force advance of pos
+ if (skip == 0) {
pos++;
- else if (skip < 0) // no chance of finding, grab to end
+ } else if (skip < 0) {
pos = queue.length();
- else
+ } else {
pos += skip;
- }
- else
+ }
+ } else {
pos++;
+ }
}
- String data = queue.substring(start, pos);
- return data;
+ String data = queue.substring(start, pos);
+ return data;
}
/**
- Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
- @param seq any number of terminators to consume to. <b>Case insensitive.</b>
- @return consumed string
+ * Consumes to the first sequence provided, or to the end of the queue.
+ * Leaves the terminator on the queue.
+ *
+ * @param seq
+ * any number of terminators to consume to. <b>Case
+ * insensitive.</b>
+ * @return consumed string
*/
- // todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
+ // todo: method name. not good that consumeTo cares for case, and consume to
+ // any doesn't. And the only use for this
// is is a case sensitive time...
public String consumeToAny(String... seq) {
int start = pos;
@@ -226,16 +278,20 @@ public class TokenQueue {
pos++;
}
- String data = queue.substring(start, pos);
- return data;
+ String data = queue.substring(start, pos);
+ return data;
}
/**
- * Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
+ * Pulls a string off the queue (like consumeTo), and then pulls off the
+ * matched string (but does not return it).
* <p>
- * If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
- * isEmpty() == true).
- * @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
+ * If the queue runs out of characters before finding the seq, will return
+ * as much as it can (and queue will go isEmpty() == true).
+ *
+ * @param seq
+ * String to match up to, and not include in return, and to pull
+ * off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
public String chompTo(String seq) {
@@ -243,7 +299,7 @@ public class TokenQueue {
matchChomp(seq);
return data;
}
-
+
public String chompToIgnoreCase(String seq) {
String data = consumeToIgnoreCase(seq); // case insensitive scan
matchChomp(seq);
@@ -251,12 +307,17 @@ public class TokenQueue {
}
/**
- * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
- * and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
- * in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
+ * Pulls a balanced string off the queue. E.g. if queue is
+ * "(one (two) three) four", (,) will return "one (two) three", and leave
+ * " four" on the queue. Unbalanced openers and closers can be escaped (with
+ * \). Those escapes will be left in the returned string, which is suitable
+ * for regexes (where we need to preserve the escape), but unsuitable for
* contains text strings; use unescape for that.
- * @param open opener
- * @param close closer
+ *
+ * @param open
+ * opener
+ * @param close
+ * closer
* @return data matched from the queue
*/
public String chompBalanced(char open, char close) {
@@ -265,25 +326,32 @@ public class TokenQueue {
char last = 0;
do {
- if (isEmpty()) break;
+ if (isEmpty()) {
+ break;
+ }
Character c = consume();
if (last == 0 || last != ESC) {
- if (c.equals(open))
+ if (c.equals(open)) {
depth++;
- else if (c.equals(close))
+ } else if (c.equals(close)) {
depth--;
+ }
}
- if (depth > 0 && last != 0)
- accum.append(c); // don't include the outer match pair in the return
+ if (depth > 0 && last != 0) {
+ accum.append(c); // don't include the outer match pair in the
+ // return
+ }
last = c;
} while (depth > 0);
return accum.toString();
}
-
+
/**
* Unescaped a \ escaped string.
- * @param in backslash escaped string
+ *
+ * @param in
+ * backslash escaped string
* @return unescaped string
*/
public static String unescape(String in) {
@@ -291,11 +359,12 @@ public class TokenQueue {
char last = 0;
for (char c : in.toCharArray()) {
if (c == ESC) {
- if (last != 0 && last == ESC)
+ if (last != 0 && last == ESC) {
out.append(c);
- }
- else
+ }
+ } else {
out.append(c);
+ }
last = c;
}
return out.toString();
@@ -315,15 +384,17 @@ public class TokenQueue {
/**
* Retrieves the next run of word type (letter or digit) off the queue.
+ *
* @return String of word characters from queue, or empty string if none.
*/
public String consumeWord() {
int start = pos;
- while (matchesWord())
+ while (matchesWord()) {
pos++;
+ }
return queue.substring(start, pos);
}
-
+
/**
* Consume an tag name off the queue (word or :, _, -)
*
@@ -331,53 +402,61 @@ public class TokenQueue {
*/
public String consumeTagName() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
+ while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
-
+
/**
- * Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
+ * Consume a CSS element selector (tag name, but | instead of : for
+ * namespaces, to not conflict with :pseudo selects).
*
* @return tag name
*/
public String consumeElementSelector() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
+ while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
/**
- Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
- http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
- @return identifier
+ * Consume a CSS identifier (ID or class) off the queue (letter, digit, -,
+ * _) http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
+ *
+ * @return identifier
*/
public String consumeCssIdentifier() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
+ while (!isEmpty() && (matchesWord() || matchesAny('-', '_'))) {
pos++;
+ }
return queue.substring(start, pos);
}
/**
- Consume an attribute key off the queue (letter, digit, -, _, :")
- @return attribute key
+ * Consume an attribute key off the queue (letter, digit, -, _, :")
+ *
+ * @return attribute key
*/
public String consumeAttributeKey() {
int start = pos;
- while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
+ while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':'))) {
pos++;
-
+ }
+
return queue.substring(start, pos);
}
/**
- Consume and return whatever is left on the queue.
- @return remained of queue.
+ * Consume and return whatever is left on the queue.
+ *
+ * @return remained of queue.
*/
public String remainder() {
StringBuilder accum = new StringBuilder();
@@ -386,7 +465,8 @@ public class TokenQueue {
}
return accum.toString();
}
-
+
+ @Override
public String toString() {
return queue.substring(pos);
}
diff --git a/server/src/org/jsoup/parser/Tokeniser.java b/server/src/org/jsoup/parser/Tokeniser.java
index ce6ee690d6..f46c962281 100644
--- a/server/src/org/jsoup/parser/Tokeniser.java
+++ b/server/src/org/jsoup/parser/Tokeniser.java
@@ -3,9 +3,6 @@ package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Entities;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* Readers the input stream into tokens.
*/
@@ -15,16 +12,21 @@ class Tokeniser {
private CharacterReader reader; // html input
private ParseErrorList errors; // errors found while tokenising
- private TokeniserState state = TokeniserState.Data; // current tokenisation state
+ private TokeniserState state = TokeniserState.Data; // current tokenisation
+ // state
private Token emitPending; // the token we are about to emit on next read
private boolean isEmitPending = false;
- private StringBuilder charBuffer = new StringBuilder(); // buffers characters to output as one token
+ private StringBuilder charBuffer = new StringBuilder(); // buffers
+ // characters to
+ // output as one
+ // token
StringBuilder dataBuffer; // buffers data looking for </script>
Token.Tag tagPending; // tag we are building up
Token.Doctype doctypePending; // doctype building up
Token.Comment commentPending; // comment building up
- private Token.StartTag lastStartTag; // the last start tag emitted, to test appropriate end tag
+ private Token.StartTag lastStartTag; // the last start tag emitted, to test
+ // appropriate end tag
private boolean selfClosingFlagAcknowledged = true;
Tokeniser(CharacterReader reader, ParseErrorList errors) {
@@ -38,10 +40,12 @@ class Tokeniser {
selfClosingFlagAcknowledged = true;
}
- while (!isEmitPending)
+ while (!isEmitPending) {
state.read(this, reader);
+ }
- // if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
+ // if emit is pending, a non-character token was found: return any chars
+ // in buffer, and leave token for next read:
if (charBuffer.length() > 0) {
String str = charBuffer.toString();
charBuffer.delete(0, charBuffer.length());
@@ -61,17 +65,20 @@ class Tokeniser {
if (token.type == Token.TokenType.StartTag) {
Token.StartTag startTag = (Token.StartTag) token;
lastStartTag = startTag;
- if (startTag.selfClosing)
+ if (startTag.selfClosing) {
selfClosingFlagAcknowledged = false;
+ }
} else if (token.type == Token.TokenType.EndTag) {
Token.EndTag endTag = (Token.EndTag) token;
- if (endTag.attributes.size() > 0)
+ if (endTag.attributes.size() > 0) {
error("Attributes incorrectly present on end tag");
+ }
}
}
void emit(String str) {
- // buffer strings up until last string token found, to emit only one token for a run of character refs etc.
+ // buffer strings up until last string token found, to emit only one
+ // token for a run of character refs etc.
// does not set isEmitPending; read checks that
charBuffer.append(str);
}
@@ -97,32 +104,40 @@ class Tokeniser {
selfClosingFlagAcknowledged = true;
}
- Character consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
- if (reader.isEmpty())
+ Character consumeCharacterReference(Character additionalAllowedCharacter,
+ boolean inAttribute) {
+ if (reader.isEmpty()) {
return null;
- if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
+ }
+ if (additionalAllowedCharacter != null
+ && additionalAllowedCharacter == reader.current()) {
return null;
- if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&'))
+ }
+ if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&')) {
return null;
+ }
reader.mark();
if (reader.matchConsume("#")) { // numbered
boolean isHexMode = reader.matchConsumeIgnoreCase("X");
- String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
+ String numRef = isHexMode ? reader.consumeHexSequence() : reader
+ .consumeDigitSequence();
if (numRef.length() == 0) { // didn't match anything
characterReferenceError("numeric reference with no numerals");
reader.rewindToMark();
return null;
}
- if (!reader.matchConsume(";"))
+ if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon"); // missing semi
+ }
int charval = -1;
try {
int base = isHexMode ? 16 : 10;
charval = Integer.valueOf(numRef, base);
} catch (NumberFormatException e) {
} // skip
- if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
+ if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF)
+ || charval > 0x10FFFF) {
characterReferenceError("character outside of valid range");
return replacementChar;
} else {
@@ -131,32 +146,40 @@ class Tokeniser {
return (char) charval;
}
} else { // named
- // get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
+ // get as many letters as possible, and look for matching entities.
+ // unconsume backwards till a match is found
String nameRef = reader.consumeLetterThenDigitSequence();
- String origNameRef = new String(nameRef); // for error reporting. nameRef gets chomped looking for matches
+ String origNameRef = new String(nameRef); // for error reporting.
+ // nameRef gets chomped
+ // looking for matches
boolean looksLegit = reader.matches(';');
boolean found = false;
while (nameRef.length() > 0 && !found) {
- if (Entities.isNamedEntity(nameRef))
+ if (Entities.isNamedEntity(nameRef)) {
found = true;
- else {
- nameRef = nameRef.substring(0, nameRef.length()-1);
+ } else {
+ nameRef = nameRef.substring(0, nameRef.length() - 1);
reader.unconsume();
}
}
if (!found) {
- if (looksLegit) // named with semicolon
- characterReferenceError(String.format("invalid named referenece '%s'", origNameRef));
+ if (looksLegit) {
+ characterReferenceError(String.format(
+ "invalid named referenece '%s'", origNameRef));
+ }
reader.rewindToMark();
return null;
}
- if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
+ if (inAttribute
+ && (reader.matchesLetter() || reader.matchesDigit() || reader
+ .matchesAny('=', '-', '_'))) {
// don't want that to match
reader.rewindToMark();
return null;
}
- if (!reader.matchConsume(";"))
+ if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon"); // missing semi
+ }
return Entities.getCharacterByName(nameRef);
}
}
@@ -192,8 +215,9 @@ class Tokeniser {
}
boolean isAppropriateEndTagToken() {
- if (lastStartTag == null)
+ if (lastStartTag == null) {
return false;
+ }
return tagPending.tagName.equals(lastStartTag.tagName);
}
@@ -202,23 +226,33 @@ class Tokeniser {
}
void error(TokeniserState state) {
- if (errors.canAddError())
- errors.add(new ParseError(reader.pos(), "Unexpected character '%s' in input state [%s]", reader.current(), state));
+ if (errors.canAddError()) {
+ errors.add(new ParseError(reader.pos(),
+ "Unexpected character '%s' in input state [%s]", reader
+ .current(), state));
+ }
}
void eofError(TokeniserState state) {
- if (errors.canAddError())
- errors.add(new ParseError(reader.pos(), "Unexpectedly reached end of file (EOF) in input state [%s]", state));
+ if (errors.canAddError()) {
+ errors.add(new ParseError(
+ reader.pos(),
+ "Unexpectedly reached end of file (EOF) in input state [%s]",
+ state));
+ }
}
private void characterReferenceError(String message) {
- if (errors.canAddError())
- errors.add(new ParseError(reader.pos(), "Invalid character reference: %s", message));
+ if (errors.canAddError()) {
+ errors.add(new ParseError(reader.pos(),
+ "Invalid character reference: %s", message));
+ }
}
private void error(String errorMsg) {
- if (errors.canAddError())
+ if (errors.canAddError()) {
errors.add(new ParseError(reader.pos(), errorMsg));
+ }
}
boolean currentNodeInHtmlNS() {
diff --git a/server/src/org/jsoup/parser/TokeniserState.java b/server/src/org/jsoup/parser/TokeniserState.java
index e3013c73e9..7f7315d769 100644
--- a/server/src/org/jsoup/parser/TokeniserState.java
+++ b/server/src/org/jsoup/parser/TokeniserState.java
@@ -5,162 +5,174 @@ package org.jsoup.parser;
*/
enum TokeniserState {
Data {
- // in data state, gather characters until a character reference or tag is found
+ // in data state, gather characters until a character reference or tag
+ // is found
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case '&':
- t.advanceTransition(CharacterReferenceInData);
- break;
- case '<':
- t.advanceTransition(TagOpen);
- break;
- case nullChar:
- t.error(this); // NOT replacement character (oddly?)
- t.emit(r.consume());
- break;
- case eof:
- t.emit(new Token.EOF());
- break;
- default:
- String data = r.consumeToAny('&', '<', nullChar);
- t.emit(data);
- break;
+ case '&':
+ t.advanceTransition(CharacterReferenceInData);
+ break;
+ case '<':
+ t.advanceTransition(TagOpen);
+ break;
+ case nullChar:
+ t.error(this); // NOT replacement character (oddly?)
+ t.emit(r.consume());
+ break;
+ case eof:
+ t.emit(new Token.EOF());
+ break;
+ default:
+ String data = r.consumeToAny('&', '<', nullChar);
+ t.emit(data);
+ break;
}
}
},
CharacterReferenceInData {
// from & in data
+ @Override
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
- if (c == null)
+ if (c == null) {
t.emit('&');
- else
+ } else {
t.emit(c);
+ }
t.transition(Data);
}
},
Rcdata {
- /// handles data in title, textarea etc
+ // / handles data in title, textarea etc
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case '&':
- t.advanceTransition(CharacterReferenceInRcdata);
- break;
- case '<':
- t.advanceTransition(RcdataLessthanSign);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- case eof:
- t.emit(new Token.EOF());
- break;
- default:
- String data = r.consumeToAny('&', '<', nullChar);
- t.emit(data);
- break;
+ case '&':
+ t.advanceTransition(CharacterReferenceInRcdata);
+ break;
+ case '<':
+ t.advanceTransition(RcdataLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ case eof:
+ t.emit(new Token.EOF());
+ break;
+ default:
+ String data = r.consumeToAny('&', '<', nullChar);
+ t.emit(data);
+ break;
}
}
},
CharacterReferenceInRcdata {
+ @Override
void read(Tokeniser t, CharacterReader r) {
Character c = t.consumeCharacterReference(null, false);
- if (c == null)
+ if (c == null) {
t.emit('&');
- else
+ } else {
t.emit(c);
+ }
t.transition(Rcdata);
}
},
Rawtext {
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case '<':
- t.advanceTransition(RawtextLessthanSign);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- case eof:
- t.emit(new Token.EOF());
- break;
- default:
- String data = r.consumeToAny('<', nullChar);
- t.emit(data);
- break;
+ case '<':
+ t.advanceTransition(RawtextLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ case eof:
+ t.emit(new Token.EOF());
+ break;
+ default:
+ String data = r.consumeToAny('<', nullChar);
+ t.emit(data);
+ break;
}
}
},
ScriptData {
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case '<':
- t.advanceTransition(ScriptDataLessthanSign);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- case eof:
- t.emit(new Token.EOF());
- break;
- default:
- String data = r.consumeToAny('<', nullChar);
- t.emit(data);
- break;
+ case '<':
+ t.advanceTransition(ScriptDataLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ case eof:
+ t.emit(new Token.EOF());
+ break;
+ default:
+ String data = r.consumeToAny('<', nullChar);
+ t.emit(data);
+ break;
}
}
},
PLAINTEXT {
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- case eof:
- t.emit(new Token.EOF());
- break;
- default:
- String data = r.consumeTo(nullChar);
- t.emit(data);
- break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ case eof:
+ t.emit(new Token.EOF());
+ break;
+ default:
+ String data = r.consumeTo(nullChar);
+ t.emit(data);
+ break;
}
}
},
TagOpen {
// from < in data
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.current()) {
- case '!':
- t.advanceTransition(MarkupDeclarationOpen);
- break;
- case '/':
- t.advanceTransition(EndTagOpen);
- break;
- case '?':
- t.advanceTransition(BogusComment);
- break;
- default:
- if (r.matchesLetter()) {
- t.createTagPending(true);
- t.transition(TagName);
- } else {
- t.error(this);
- t.emit('<'); // char that got us here
- t.transition(Data);
- }
- break;
+ case '!':
+ t.advanceTransition(MarkupDeclarationOpen);
+ break;
+ case '/':
+ t.advanceTransition(EndTagOpen);
+ break;
+ case '?':
+ t.advanceTransition(BogusComment);
+ break;
+ default:
+ if (r.matchesLetter()) {
+ t.createTagPending(true);
+ t.transition(TagName);
+ } else {
+ t.error(this);
+ t.emit('<'); // char that got us here
+ t.transition(Data);
+ }
+ break;
}
}
},
EndTagOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
@@ -180,43 +192,49 @@ enum TokeniserState {
},
TagName {
// from < or </ in data, will have start or end tag pending
+ @Override
void read(Tokeniser t, CharacterReader r) {
- // previous TagOpen state did NOT consume, will have a letter char in current
- String tagName = r.consumeToAny('\t', '\n', '\f', ' ', '/', '>', nullChar).toLowerCase();
+ // previous TagOpen state did NOT consume, will have a letter char
+ // in current
+ String tagName = r.consumeToAny('\t', '\n', '\f', ' ', '/', '>',
+ nullChar).toLowerCase();
t.tagPending.appendTagName(tagName);
switch (r.consume()) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case nullChar: // replacement
- t.tagPending.appendTagName(replacementStr);
- break;
- case eof: // should emit pending tag?
- t.eofError(this);
- t.transition(Data);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case nullChar: // replacement
+ t.tagPending.appendTagName(replacementStr);
+ break;
+ case eof: // should emit pending tag?
+ t.eofError(this);
+ t.transition(Data);
// no default, as covered with above consumeToAny
}
}
},
RcdataLessthanSign {
// from < in rcdata
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.createTempBuffer();
t.advanceTransition(RCDATAEndTagOpen);
- } else if (r.matchesLetter() && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
- // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
+ } else if (r.matchesLetter()
+ && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
+ // diverge from spec: got a start tag, but there's no
+ // appropriate end tag (</title>), so rather than
// consuming to EOF; break out here
t.tagPending = new Token.EndTag(t.appropriateEndTagName());
t.emitTagPending();
@@ -229,6 +247,7 @@ enum TokeniserState {
}
},
RCDATAEndTagOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
@@ -242,6 +261,7 @@ enum TokeniserState {
}
},
RCDATAEndTagName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -252,31 +272,33 @@ enum TokeniserState {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- if (t.isAppropriateEndTagToken())
- t.transition(BeforeAttributeName);
- else
- anythingElse(t, r);
- break;
- case '/':
- if (t.isAppropriateEndTagToken())
- t.transition(SelfClosingStartTag);
- else
- anythingElse(t, r);
- break;
- case '>':
- if (t.isAppropriateEndTagToken()) {
- t.emitTagPending();
- t.transition(Data);
- }
- else
- anythingElse(t, r);
- break;
- default:
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ if (t.isAppropriateEndTagToken()) {
+ t.transition(BeforeAttributeName);
+ } else {
+ anythingElse(t, r);
+ }
+ break;
+ case '/':
+ if (t.isAppropriateEndTagToken()) {
+ t.transition(SelfClosingStartTag);
+ } else {
anythingElse(t, r);
+ }
+ break;
+ case '>':
+ if (t.isAppropriateEndTagToken()) {
+ t.emitTagPending();
+ t.transition(Data);
+ } else {
+ anythingElse(t, r);
+ }
+ break;
+ default:
+ anythingElse(t, r);
}
}
@@ -286,6 +308,7 @@ enum TokeniserState {
}
},
RawtextLessthanSign {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.createTempBuffer();
@@ -297,6 +320,7 @@ enum TokeniserState {
}
},
RawtextEndTagOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
@@ -308,6 +332,7 @@ enum TokeniserState {
}
},
RawtextEndTagName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -319,25 +344,26 @@ enum TokeniserState {
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- default:
- t.dataBuffer.append(c);
- anythingElse(t, r);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ default:
+ t.dataBuffer.append(c);
+ anythingElse(t, r);
}
- } else
+ } else {
anythingElse(t, r);
+ }
}
private void anythingElse(Tokeniser t, CharacterReader r) {
@@ -346,24 +372,26 @@ enum TokeniserState {
}
},
ScriptDataLessthanSign {
+ @Override
void read(Tokeniser t, CharacterReader r) {
switch (r.consume()) {
- case '/':
- t.createTempBuffer();
- t.transition(ScriptDataEndTagOpen);
- break;
- case '!':
- t.emit("<!");
- t.transition(ScriptDataEscapeStart);
- break;
- default:
- t.emit("<");
- r.unconsume();
- t.transition(ScriptData);
+ case '/':
+ t.createTempBuffer();
+ t.transition(ScriptDataEndTagOpen);
+ break;
+ case '!':
+ t.emit("<!");
+ t.transition(ScriptDataEscapeStart);
+ break;
+ default:
+ t.emit("<");
+ r.unconsume();
+ t.transition(ScriptData);
}
}
},
ScriptDataEndTagOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
@@ -376,6 +404,7 @@ enum TokeniserState {
}
},
ScriptDataEndTagName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -387,22 +416,22 @@ enum TokeniserState {
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- default:
- t.dataBuffer.append(c);
- anythingElse(t, r);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ default:
+ t.dataBuffer.append(c);
+ anythingElse(t, r);
}
} else {
anythingElse(t, r);
@@ -415,6 +444,7 @@ enum TokeniserState {
}
},
ScriptDataEscapeStart {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matches('-')) {
t.emit('-');
@@ -425,6 +455,7 @@ enum TokeniserState {
}
},
ScriptDataEscapeStartDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matches('-')) {
t.emit('-');
@@ -435,6 +466,7 @@ enum TokeniserState {
}
},
ScriptDataEscaped {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
@@ -443,25 +475,26 @@ enum TokeniserState {
}
switch (r.current()) {
- case '-':
- t.emit('-');
- t.advanceTransition(ScriptDataEscapedDash);
- break;
- case '<':
- t.advanceTransition(ScriptDataEscapedLessthanSign);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- default:
- String data = r.consumeToAny('-', '<', nullChar);
- t.emit(data);
+ case '-':
+ t.emit('-');
+ t.advanceTransition(ScriptDataEscapedDash);
+ break;
+ case '<':
+ t.advanceTransition(ScriptDataEscapedLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ default:
+ String data = r.consumeToAny('-', '<', nullChar);
+ t.emit(data);
}
}
},
ScriptDataEscapedDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
@@ -471,25 +504,26 @@ enum TokeniserState {
char c = r.consume();
switch (c) {
- case '-':
- t.emit(c);
- t.transition(ScriptDataEscapedDashDash);
- break;
- case '<':
- t.transition(ScriptDataEscapedLessthanSign);
- break;
- case nullChar:
- t.error(this);
- t.emit(replacementChar);
- t.transition(ScriptDataEscaped);
- break;
- default:
- t.emit(c);
- t.transition(ScriptDataEscaped);
+ case '-':
+ t.emit(c);
+ t.transition(ScriptDataEscapedDashDash);
+ break;
+ case '<':
+ t.transition(ScriptDataEscapedLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ t.emit(replacementChar);
+ t.transition(ScriptDataEscaped);
+ break;
+ default:
+ t.emit(c);
+ t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedDashDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
@@ -499,28 +533,29 @@ enum TokeniserState {
char c = r.consume();
switch (c) {
- case '-':
- t.emit(c);
- break;
- case '<':
- t.transition(ScriptDataEscapedLessthanSign);
- break;
- case '>':
- t.emit(c);
- t.transition(ScriptData);
- break;
- case nullChar:
- t.error(this);
- t.emit(replacementChar);
- t.transition(ScriptDataEscaped);
- break;
- default:
- t.emit(c);
- t.transition(ScriptDataEscaped);
+ case '-':
+ t.emit(c);
+ break;
+ case '<':
+ t.transition(ScriptDataEscapedLessthanSign);
+ break;
+ case '>':
+ t.emit(c);
+ t.transition(ScriptData);
+ break;
+ case nullChar:
+ t.error(this);
+ t.emit(replacementChar);
+ t.transition(ScriptDataEscaped);
+ break;
+ default:
+ t.emit(c);
+ t.transition(ScriptDataEscaped);
}
}
},
ScriptDataEscapedLessthanSign {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTempBuffer();
@@ -537,6 +572,7 @@ enum TokeniserState {
}
},
ScriptDataEscapedEndTagOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createTagPending(false);
@@ -550,6 +586,7 @@ enum TokeniserState {
}
},
ScriptDataEscapedEndTagName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -561,35 +598,36 @@ enum TokeniserState {
if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- default:
- t.dataBuffer.append(c);
- anythingElse(t, r);
- break;
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ default:
+ t.dataBuffer.append(c);
+ anythingElse(t, r);
+ break;
}
} else {
anythingElse(t, r);
}
}
-
+
private void anythingElse(Tokeniser t, CharacterReader r) {
t.emit("</" + t.dataBuffer.toString());
t.transition(ScriptDataEscaped);
}
},
ScriptDataDoubleEscapeStart {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -600,109 +638,114 @@ enum TokeniserState {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- case '/':
- case '>':
- if (t.dataBuffer.toString().equals("script"))
- t.transition(ScriptDataDoubleEscaped);
- else
- t.transition(ScriptDataEscaped);
- t.emit(c);
- break;
- default:
- r.unconsume();
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ case '/':
+ case '>':
+ if (t.dataBuffer.toString().equals("script")) {
+ t.transition(ScriptDataDoubleEscaped);
+ } else {
t.transition(ScriptDataEscaped);
+ }
+ t.emit(c);
+ break;
+ default:
+ r.unconsume();
+ t.transition(ScriptDataEscaped);
}
}
},
ScriptDataDoubleEscaped {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.current();
switch (c) {
- case '-':
- t.emit(c);
- t.advanceTransition(ScriptDataDoubleEscapedDash);
- break;
- case '<':
- t.emit(c);
- t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.emit(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- default:
- String data = r.consumeToAny('-', '<', nullChar);
- t.emit(data);
+ case '-':
+ t.emit(c);
+ t.advanceTransition(ScriptDataDoubleEscapedDash);
+ break;
+ case '<':
+ t.emit(c);
+ t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.emit(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ default:
+ String data = r.consumeToAny('-', '<', nullChar);
+ t.emit(data);
}
}
},
ScriptDataDoubleEscapedDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.emit(c);
- t.transition(ScriptDataDoubleEscapedDashDash);
- break;
- case '<':
- t.emit(c);
- t.transition(ScriptDataDoubleEscapedLessthanSign);
- break;
- case nullChar:
- t.error(this);
- t.emit(replacementChar);
- t.transition(ScriptDataDoubleEscaped);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- default:
- t.emit(c);
- t.transition(ScriptDataDoubleEscaped);
+ case '-':
+ t.emit(c);
+ t.transition(ScriptDataDoubleEscapedDashDash);
+ break;
+ case '<':
+ t.emit(c);
+ t.transition(ScriptDataDoubleEscapedLessthanSign);
+ break;
+ case nullChar:
+ t.error(this);
+ t.emit(replacementChar);
+ t.transition(ScriptDataDoubleEscaped);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ default:
+ t.emit(c);
+ t.transition(ScriptDataDoubleEscaped);
}
}
},
ScriptDataDoubleEscapedDashDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.emit(c);
- break;
- case '<':
- t.emit(c);
- t.transition(ScriptDataDoubleEscapedLessthanSign);
- break;
- case '>':
- t.emit(c);
- t.transition(ScriptData);
- break;
- case nullChar:
- t.error(this);
- t.emit(replacementChar);
- t.transition(ScriptDataDoubleEscaped);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- default:
- t.emit(c);
- t.transition(ScriptDataDoubleEscaped);
+ case '-':
+ t.emit(c);
+ break;
+ case '<':
+ t.emit(c);
+ t.transition(ScriptDataDoubleEscapedLessthanSign);
+ break;
+ case '>':
+ t.emit(c);
+ t.transition(ScriptData);
+ break;
+ case nullChar:
+ t.error(this);
+ t.emit(replacementChar);
+ t.transition(ScriptDataDoubleEscaped);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ default:
+ t.emit(c);
+ t.transition(ScriptDataDoubleEscaped);
}
}
},
ScriptDataDoubleEscapedLessthanSign {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matches('/')) {
t.emit('/');
@@ -714,6 +757,7 @@ enum TokeniserState {
}
},
ScriptDataDoubleEscapeEnd {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -724,357 +768,377 @@ enum TokeniserState {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- case '/':
- case '>':
- if (t.dataBuffer.toString().equals("script"))
- t.transition(ScriptDataEscaped);
- else
- t.transition(ScriptDataDoubleEscaped);
- t.emit(c);
- break;
- default:
- r.unconsume();
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ case '/':
+ case '>':
+ if (t.dataBuffer.toString().equals("script")) {
+ t.transition(ScriptDataEscaped);
+ } else {
t.transition(ScriptDataDoubleEscaped);
+ }
+ t.emit(c);
+ break;
+ default:
+ r.unconsume();
+ t.transition(ScriptDataDoubleEscaped);
}
}
},
BeforeAttributeName {
// from tagname <xxx
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break; // ignore whitespace
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.tagPending.newAttribute();
- r.unconsume();
- t.transition(AttributeName);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- case '"':
- case '\'':
- case '<':
- case '=':
- t.error(this);
- t.tagPending.newAttribute();
- t.tagPending.appendAttributeName(c);
- t.transition(AttributeName);
- break;
- default: // A-Z, anything else
- t.tagPending.newAttribute();
- r.unconsume();
- t.transition(AttributeName);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break; // ignore whitespace
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.newAttribute();
+ r.unconsume();
+ t.transition(AttributeName);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ case '"':
+ case '\'':
+ case '<':
+ case '=':
+ t.error(this);
+ t.tagPending.newAttribute();
+ t.tagPending.appendAttributeName(c);
+ t.transition(AttributeName);
+ break;
+ default: // A-Z, anything else
+ t.tagPending.newAttribute();
+ r.unconsume();
+ t.transition(AttributeName);
}
}
},
AttributeName {
// from before attribute name
+ @Override
void read(Tokeniser t, CharacterReader r) {
- String name = r.consumeToAny('\t', '\n', '\f', ' ', '/', '=', '>', nullChar, '"', '\'', '<');
+ String name = r.consumeToAny('\t', '\n', '\f', ' ', '/', '=', '>',
+ nullChar, '"', '\'', '<');
t.tagPending.appendAttributeName(name.toLowerCase());
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(AfterAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '=':
- t.transition(BeforeAttributeValue);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeName(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- case '"':
- case '\'':
- case '<':
- t.error(this);
- t.tagPending.appendAttributeName(c);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(AfterAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '=':
+ t.transition(BeforeAttributeValue);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeName(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ case '"':
+ case '\'':
+ case '<':
+ t.error(this);
+ t.tagPending.appendAttributeName(c);
// no default, as covered in consumeToAny
}
}
},
AfterAttributeName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- // ignore
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '=':
- t.transition(BeforeAttributeValue);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeName(replacementChar);
- t.transition(AttributeName);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- case '"':
- case '\'':
- case '<':
- t.error(this);
- t.tagPending.newAttribute();
- t.tagPending.appendAttributeName(c);
- t.transition(AttributeName);
- break;
- default: // A-Z, anything else
- t.tagPending.newAttribute();
- r.unconsume();
- t.transition(AttributeName);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ // ignore
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '=':
+ t.transition(BeforeAttributeValue);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeName(replacementChar);
+ t.transition(AttributeName);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ case '"':
+ case '\'':
+ case '<':
+ t.error(this);
+ t.tagPending.newAttribute();
+ t.tagPending.appendAttributeName(c);
+ t.transition(AttributeName);
+ break;
+ default: // A-Z, anything else
+ t.tagPending.newAttribute();
+ r.unconsume();
+ t.transition(AttributeName);
}
}
},
BeforeAttributeValue {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- // ignore
- break;
- case '"':
- t.transition(AttributeValue_doubleQuoted);
- break;
- case '&':
- r.unconsume();
- t.transition(AttributeValue_unquoted);
- break;
- case '\'':
- t.transition(AttributeValue_singleQuoted);
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeValue(replacementChar);
- t.transition(AttributeValue_unquoted);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- case '>':
- t.error(this);
- t.emitTagPending();
- t.transition(Data);
- break;
- case '<':
- case '=':
- case '`':
- t.error(this);
- t.tagPending.appendAttributeValue(c);
- t.transition(AttributeValue_unquoted);
- break;
- default:
- r.unconsume();
- t.transition(AttributeValue_unquoted);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ // ignore
+ break;
+ case '"':
+ t.transition(AttributeValue_doubleQuoted);
+ break;
+ case '&':
+ r.unconsume();
+ t.transition(AttributeValue_unquoted);
+ break;
+ case '\'':
+ t.transition(AttributeValue_singleQuoted);
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeValue(replacementChar);
+ t.transition(AttributeValue_unquoted);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ case '>':
+ t.error(this);
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case '<':
+ case '=':
+ case '`':
+ t.error(this);
+ t.tagPending.appendAttributeValue(c);
+ t.transition(AttributeValue_unquoted);
+ break;
+ default:
+ r.unconsume();
+ t.transition(AttributeValue_unquoted);
}
}
},
AttributeValue_doubleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
String value = r.consumeToAny('"', '&', nullChar);
- if (value.length() > 0)
+ if (value.length() > 0) {
t.tagPending.appendAttributeValue(value);
+ }
char c = r.consume();
switch (c) {
- case '"':
- t.transition(AfterAttributeValue_quoted);
- break;
- case '&':
- Character ref = t.consumeCharacterReference('"', true);
- if (ref != null)
- t.tagPending.appendAttributeValue(ref);
- else
- t.tagPending.appendAttributeValue('&');
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeValue(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- // no default, handled in consume to any above
+ case '"':
+ t.transition(AfterAttributeValue_quoted);
+ break;
+ case '&':
+ Character ref = t.consumeCharacterReference('"', true);
+ if (ref != null) {
+ t.tagPending.appendAttributeValue(ref);
+ } else {
+ t.tagPending.appendAttributeValue('&');
+ }
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeValue(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ // no default, handled in consume to any above
}
}
},
AttributeValue_singleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
String value = r.consumeToAny('\'', '&', nullChar);
- if (value.length() > 0)
+ if (value.length() > 0) {
t.tagPending.appendAttributeValue(value);
+ }
char c = r.consume();
switch (c) {
- case '\'':
- t.transition(AfterAttributeValue_quoted);
- break;
- case '&':
- Character ref = t.consumeCharacterReference('\'', true);
- if (ref != null)
- t.tagPending.appendAttributeValue(ref);
- else
- t.tagPending.appendAttributeValue('&');
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeValue(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- // no default, handled in consume to any above
+ case '\'':
+ t.transition(AfterAttributeValue_quoted);
+ break;
+ case '&':
+ Character ref = t.consumeCharacterReference('\'', true);
+ if (ref != null) {
+ t.tagPending.appendAttributeValue(ref);
+ } else {
+ t.tagPending.appendAttributeValue('&');
+ }
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeValue(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ // no default, handled in consume to any above
}
}
},
AttributeValue_unquoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
- String value = r.consumeToAny('\t', '\n', '\f', ' ', '&', '>', nullChar, '"', '\'', '<', '=', '`');
- if (value.length() > 0)
+ String value = r.consumeToAny('\t', '\n', '\f', ' ', '&', '>',
+ nullChar, '"', '\'', '<', '=', '`');
+ if (value.length() > 0) {
t.tagPending.appendAttributeValue(value);
+ }
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '&':
- Character ref = t.consumeCharacterReference('>', true);
- if (ref != null)
- t.tagPending.appendAttributeValue(ref);
- else
- t.tagPending.appendAttributeValue('&');
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.tagPending.appendAttributeValue(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- case '"':
- case '\'':
- case '<':
- case '=':
- case '`':
- t.error(this);
- t.tagPending.appendAttributeValue(c);
- break;
- // no default, handled in consume to any above
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '&':
+ Character ref = t.consumeCharacterReference('>', true);
+ if (ref != null) {
+ t.tagPending.appendAttributeValue(ref);
+ } else {
+ t.tagPending.appendAttributeValue('&');
+ }
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.tagPending.appendAttributeValue(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ case '"':
+ case '\'':
+ case '<':
+ case '=':
+ case '`':
+ t.error(this);
+ t.tagPending.appendAttributeValue(c);
+ break;
+ // no default, handled in consume to any above
}
}
},
// CharacterReferenceInAttributeValue state handled inline
AfterAttributeValue_quoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeAttributeName);
- break;
- case '/':
- t.transition(SelfClosingStartTag);
- break;
- case '>':
- t.emitTagPending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- default:
- t.error(this);
- r.unconsume();
- t.transition(BeforeAttributeName);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeAttributeName);
+ break;
+ case '/':
+ t.transition(SelfClosingStartTag);
+ break;
+ case '>':
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ r.unconsume();
+ t.transition(BeforeAttributeName);
}
}
},
SelfClosingStartTag {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '>':
- t.tagPending.selfClosing = true;
- t.emitTagPending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.transition(BeforeAttributeName);
+ case '>':
+ t.tagPending.selfClosing = true;
+ t.emitTagPending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.transition(BeforeAttributeName);
}
}
},
BogusComment {
+ @Override
void read(Tokeniser t, CharacterReader r) {
- // todo: handle bogus comment starting from eof. when does that trigger?
+ // todo: handle bogus comment starting from eof. when does that
+ // trigger?
// rewind to capture character that lead us here
r.unconsume();
Token.Comment comment = new Token.Comment();
@@ -1085,6 +1149,7 @@ enum TokeniserState {
}
},
MarkupDeclarationOpen {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchConsume("--")) {
t.createCommentPending();
@@ -1092,202 +1157,214 @@ enum TokeniserState {
} else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
t.transition(Doctype);
} else if (r.matchConsume("[CDATA[")) {
- // todo: should actually check current namepspace, and only non-html allows cdata. until namespace
+ // todo: should actually check current namepspace, and only
+ // non-html allows cdata. until namespace
// is implemented properly, keep handling as cdata
- //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
+ // } else if (!t.currentNodeInHtmlNS() &&
+ // r.matchConsume("[CDATA[")) {
t.transition(CdataSection);
} else {
t.error(this);
- t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind
+ t.advanceTransition(BogusComment); // advance so this character
+ // gets in bogus comment
+ // data's rewind
}
}
},
CommentStart {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.transition(CommentStartDash);
- break;
- case nullChar:
- t.error(this);
- t.commentPending.data.append(replacementChar);
- t.transition(Comment);
- break;
- case '>':
- t.error(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.commentPending.data.append(c);
- t.transition(Comment);
+ case '-':
+ t.transition(CommentStartDash);
+ break;
+ case nullChar:
+ t.error(this);
+ t.commentPending.data.append(replacementChar);
+ t.transition(Comment);
+ break;
+ case '>':
+ t.error(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.commentPending.data.append(c);
+ t.transition(Comment);
}
}
},
CommentStartDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.transition(CommentStartDash);
- break;
- case nullChar:
- t.error(this);
- t.commentPending.data.append(replacementChar);
- t.transition(Comment);
- break;
- case '>':
- t.error(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.commentPending.data.append(c);
- t.transition(Comment);
+ case '-':
+ t.transition(CommentStartDash);
+ break;
+ case nullChar:
+ t.error(this);
+ t.commentPending.data.append(replacementChar);
+ t.transition(Comment);
+ break;
+ case '>':
+ t.error(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.commentPending.data.append(c);
+ t.transition(Comment);
}
}
},
Comment {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.current();
switch (c) {
- case '-':
- t.advanceTransition(CommentEndDash);
- break;
- case nullChar:
- t.error(this);
- r.advance();
- t.commentPending.data.append(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.commentPending.data.append(r.consumeToAny('-', nullChar));
+ case '-':
+ t.advanceTransition(CommentEndDash);
+ break;
+ case nullChar:
+ t.error(this);
+ r.advance();
+ t.commentPending.data.append(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.commentPending.data.append(r.consumeToAny('-', nullChar));
}
}
},
CommentEndDash {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.transition(CommentEnd);
- break;
- case nullChar:
- t.error(this);
- t.commentPending.data.append('-').append(replacementChar);
- t.transition(Comment);
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.commentPending.data.append('-').append(c);
- t.transition(Comment);
+ case '-':
+ t.transition(CommentEnd);
+ break;
+ case nullChar:
+ t.error(this);
+ t.commentPending.data.append('-').append(replacementChar);
+ t.transition(Comment);
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.commentPending.data.append('-').append(c);
+ t.transition(Comment);
}
}
},
CommentEnd {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '>':
- t.emitCommentPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.commentPending.data.append("--").append(replacementChar);
- t.transition(Comment);
- break;
- case '!':
- t.error(this);
- t.transition(CommentEndBang);
- break;
- case '-':
- t.error(this);
- t.commentPending.data.append('-');
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.commentPending.data.append("--").append(c);
- t.transition(Comment);
+ case '>':
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.commentPending.data.append("--").append(replacementChar);
+ t.transition(Comment);
+ break;
+ case '!':
+ t.error(this);
+ t.transition(CommentEndBang);
+ break;
+ case '-':
+ t.error(this);
+ t.commentPending.data.append('-');
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.commentPending.data.append("--").append(c);
+ t.transition(Comment);
}
}
},
CommentEndBang {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '-':
- t.commentPending.data.append("--!");
- t.transition(CommentEndDash);
- break;
- case '>':
- t.emitCommentPending();
- t.transition(Data);
- break;
- case nullChar:
- t.error(this);
- t.commentPending.data.append("--!").append(replacementChar);
- t.transition(Comment);
- break;
- case eof:
- t.eofError(this);
- t.emitCommentPending();
- t.transition(Data);
- break;
- default:
- t.commentPending.data.append("--!").append(c);
- t.transition(Comment);
+ case '-':
+ t.commentPending.data.append("--!");
+ t.transition(CommentEndDash);
+ break;
+ case '>':
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ case nullChar:
+ t.error(this);
+ t.commentPending.data.append("--!").append(replacementChar);
+ t.transition(Comment);
+ break;
+ case eof:
+ t.eofError(this);
+ t.emitCommentPending();
+ t.transition(Data);
+ break;
+ default:
+ t.commentPending.data.append("--!").append(c);
+ t.transition(Comment);
}
}
},
Doctype {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeDoctypeName);
- break;
- case eof:
- t.eofError(this);
- t.createDoctypePending();
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.transition(BeforeDoctypeName);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeDoctypeName);
+ break;
+ case eof:
+ t.eofError(this);
+ t.createDoctypePending();
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.transition(BeforeDoctypeName);
}
}
},
BeforeDoctypeName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
t.createDoctypePending();
@@ -1296,31 +1373,32 @@ enum TokeniserState {
}
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break; // ignore whitespace
- case nullChar:
- t.error(this);
- t.doctypePending.name.append(replacementChar);
- t.transition(DoctypeName);
- break;
- case eof:
- t.eofError(this);
- t.createDoctypePending();
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.createDoctypePending();
- t.doctypePending.name.append(c);
- t.transition(DoctypeName);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break; // ignore whitespace
+ case nullChar:
+ t.error(this);
+ t.doctypePending.name.append(replacementChar);
+ t.transition(DoctypeName);
+ break;
+ case eof:
+ t.eofError(this);
+ t.createDoctypePending();
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.createDoctypePending();
+ t.doctypePending.name.append(c);
+ t.transition(DoctypeName);
}
}
},
DoctypeName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.matchesLetter()) {
String name = r.consumeLetterSequence();
@@ -1329,32 +1407,33 @@ enum TokeniserState {
}
char c = r.consume();
switch (c) {
- case '>':
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(AfterDoctypeName);
- break;
- case nullChar:
- t.error(this);
- t.doctypePending.name.append(replacementChar);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.doctypePending.name.append(c);
+ case '>':
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(AfterDoctypeName);
+ break;
+ case nullChar:
+ t.error(this);
+ t.doctypePending.name.append(replacementChar);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.doctypePending.name.append(c);
}
}
},
AfterDoctypeName {
+ @Override
void read(Tokeniser t, CharacterReader r) {
if (r.isEmpty()) {
t.eofError(this);
@@ -1363,9 +1442,9 @@ enum TokeniserState {
t.transition(Data);
return;
}
- if (r.matchesAny('\t', '\n', '\f', ' '))
+ if (r.matchesAny('\t', '\n', '\f', ' ')) {
r.advance(); // ignore whitespace
- else if (r.matches('>')) {
+ } else if (r.matches('>')) {
t.emitDoctypePending();
t.advanceTransition(Data);
} else if (r.matchConsumeIgnoreCase("PUBLIC")) {
@@ -1381,385 +1460,398 @@ enum TokeniserState {
}
},
AfterDoctypePublicKeyword {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeDoctypePublicIdentifier);
- break;
- case '"':
- t.error(this);
- // set public id to empty string
- t.transition(DoctypePublicIdentifier_doubleQuoted);
- break;
- case '\'':
- t.error(this);
- // set public id to empty string
- t.transition(DoctypePublicIdentifier_singleQuoted);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.transition(BogusDoctype);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeDoctypePublicIdentifier);
+ break;
+ case '"':
+ t.error(this);
+ // set public id to empty string
+ t.transition(DoctypePublicIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ t.error(this);
+ // set public id to empty string
+ t.transition(DoctypePublicIdentifier_singleQuoted);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.transition(BogusDoctype);
}
}
},
BeforeDoctypePublicIdentifier {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break;
- case '"':
- // set public id to empty string
- t.transition(DoctypePublicIdentifier_doubleQuoted);
- break;
- case '\'':
- // set public id to empty string
- t.transition(DoctypePublicIdentifier_singleQuoted);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.transition(BogusDoctype);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break;
+ case '"':
+ // set public id to empty string
+ t.transition(DoctypePublicIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ // set public id to empty string
+ t.transition(DoctypePublicIdentifier_singleQuoted);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.transition(BogusDoctype);
}
}
},
DoctypePublicIdentifier_doubleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '"':
- t.transition(AfterDoctypePublicIdentifier);
- break;
- case nullChar:
- t.error(this);
- t.doctypePending.publicIdentifier.append(replacementChar);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.doctypePending.publicIdentifier.append(c);
+ case '"':
+ t.transition(AfterDoctypePublicIdentifier);
+ break;
+ case nullChar:
+ t.error(this);
+ t.doctypePending.publicIdentifier.append(replacementChar);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.doctypePending.publicIdentifier.append(c);
}
}
},
DoctypePublicIdentifier_singleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\'':
- t.transition(AfterDoctypePublicIdentifier);
- break;
- case nullChar:
- t.error(this);
- t.doctypePending.publicIdentifier.append(replacementChar);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.doctypePending.publicIdentifier.append(c);
+ case '\'':
+ t.transition(AfterDoctypePublicIdentifier);
+ break;
+ case nullChar:
+ t.error(this);
+ t.doctypePending.publicIdentifier.append(replacementChar);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.doctypePending.publicIdentifier.append(c);
}
}
},
AfterDoctypePublicIdentifier {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BetweenDoctypePublicAndSystemIdentifiers);
- break;
- case '>':
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case '"':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_doubleQuoted);
- break;
- case '\'':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_singleQuoted);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.transition(BogusDoctype);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BetweenDoctypePublicAndSystemIdentifiers);
+ break;
+ case '>':
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case '"':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_singleQuoted);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.transition(BogusDoctype);
}
}
},
BetweenDoctypePublicAndSystemIdentifiers {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break;
- case '>':
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case '"':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_doubleQuoted);
- break;
- case '\'':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_singleQuoted);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.transition(BogusDoctype);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break;
+ case '>':
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case '"':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_singleQuoted);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.transition(BogusDoctype);
}
}
},
AfterDoctypeSystemKeyword {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- t.transition(BeforeDoctypeSystemIdentifier);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case '"':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_doubleQuoted);
- break;
- case '\'':
- t.error(this);
- // system id empty
- t.transition(DoctypeSystemIdentifier_singleQuoted);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ t.transition(BeforeDoctypeSystemIdentifier);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case '"':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ t.error(this);
+ // system id empty
+ t.transition(DoctypeSystemIdentifier_singleQuoted);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
}
}
},
BeforeDoctypeSystemIdentifier {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break;
- case '"':
- // set system id to empty string
- t.transition(DoctypeSystemIdentifier_doubleQuoted);
- break;
- case '\'':
- // set public id to empty string
- t.transition(DoctypeSystemIdentifier_singleQuoted);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.transition(BogusDoctype);
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break;
+ case '"':
+ // set system id to empty string
+ t.transition(DoctypeSystemIdentifier_doubleQuoted);
+ break;
+ case '\'':
+ // set public id to empty string
+ t.transition(DoctypeSystemIdentifier_singleQuoted);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.transition(BogusDoctype);
}
}
},
DoctypeSystemIdentifier_doubleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '"':
- t.transition(AfterDoctypeSystemIdentifier);
- break;
- case nullChar:
- t.error(this);
- t.doctypePending.systemIdentifier.append(replacementChar);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.doctypePending.systemIdentifier.append(c);
+ case '"':
+ t.transition(AfterDoctypeSystemIdentifier);
+ break;
+ case nullChar:
+ t.error(this);
+ t.doctypePending.systemIdentifier.append(replacementChar);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.doctypePending.systemIdentifier.append(c);
}
}
},
DoctypeSystemIdentifier_singleQuoted {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\'':
- t.transition(AfterDoctypeSystemIdentifier);
- break;
- case nullChar:
- t.error(this);
- t.doctypePending.systemIdentifier.append(replacementChar);
- break;
- case '>':
- t.error(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.doctypePending.systemIdentifier.append(c);
+ case '\'':
+ t.transition(AfterDoctypeSystemIdentifier);
+ break;
+ case nullChar:
+ t.error(this);
+ t.doctypePending.systemIdentifier.append(replacementChar);
+ break;
+ case '>':
+ t.error(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.doctypePending.systemIdentifier.append(c);
}
}
},
AfterDoctypeSystemIdentifier {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '\t':
- case '\n':
- case '\f':
- case ' ':
- break;
- case '>':
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.eofError(this);
- t.doctypePending.forceQuirks = true;
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- t.error(this);
- t.transition(BogusDoctype);
- // NOT force quirks
+ case '\t':
+ case '\n':
+ case '\f':
+ case ' ':
+ break;
+ case '>':
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.eofError(this);
+ t.doctypePending.forceQuirks = true;
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ t.error(this);
+ t.transition(BogusDoctype);
+ // NOT force quirks
}
}
},
BogusDoctype {
+ @Override
void read(Tokeniser t, CharacterReader r) {
char c = r.consume();
switch (c) {
- case '>':
- t.emitDoctypePending();
- t.transition(Data);
- break;
- case eof:
- t.emitDoctypePending();
- t.transition(Data);
- break;
- default:
- // ignore char
- break;
+ case '>':
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ case eof:
+ t.emitDoctypePending();
+ t.transition(Data);
+ break;
+ default:
+ // ignore char
+ break;
}
}
},
CdataSection {
+ @Override
void read(Tokeniser t, CharacterReader r) {
String data = r.consumeTo("]]>");
t.emit(data);
@@ -1768,11 +1860,11 @@ enum TokeniserState {
}
};
-
abstract void read(Tokeniser t, CharacterReader r);
private static final char nullChar = '\u0000';
private static final char replacementChar = Tokeniser.replacementChar;
- private static final String replacementStr = String.valueOf(Tokeniser.replacementChar);
+ private static final String replacementStr = String
+ .valueOf(Tokeniser.replacementChar);
private static final char eof = CharacterReader.EOF;
}
diff --git a/server/src/org/jsoup/parser/TreeBuilder.java b/server/src/org/jsoup/parser/TreeBuilder.java
index e06caad501..5e2dbebc66 100644
--- a/server/src/org/jsoup/parser/TreeBuilder.java
+++ b/server/src/org/jsoup/parser/TreeBuilder.java
@@ -5,9 +5,6 @@ import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* @author Jonathan Hedley
*/
@@ -15,12 +12,15 @@ abstract class TreeBuilder {
CharacterReader reader;
Tokeniser tokeniser;
protected Document doc; // current doc we are building into
- protected DescendableLinkedList<Element> stack; // the stack of open elements
+ protected DescendableLinkedList<Element> stack; // the stack of open
+ // elements
protected String baseUri; // current base uri, for creating new elements
- protected Token currentToken; // currentToken is used only for error tracking.
+ protected Token currentToken; // currentToken is used only for error
+ // tracking.
protected ParseErrorList errors; // null when not tracking errors
- protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
+ protected void initialiseParse(String input, String baseUri,
+ ParseErrorList errors) {
Validate.notNull(input, "String input must not be null");
Validate.notNull(baseUri, "BaseURI must not be null");
@@ -47,8 +47,9 @@ abstract class TreeBuilder {
Token token = tokeniser.read();
process(token);
- if (token.type == Token.TokenType.EOF)
+ if (token.type == Token.TokenType.EOF) {
break;
+ }
}
}
diff --git a/server/src/org/jsoup/parser/XmlTreeBuilder.java b/server/src/org/jsoup/parser/XmlTreeBuilder.java
index 3f03ad26ac..c2a3635b3d 100644
--- a/server/src/org/jsoup/parser/XmlTreeBuilder.java
+++ b/server/src/org/jsoup/parser/XmlTreeBuilder.java
@@ -1,43 +1,49 @@
package org.jsoup.parser;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
-
import java.util.Iterator;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Comment;
+import org.jsoup.nodes.DocumentType;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+
/**
* @author Jonathan Hedley
*/
public class XmlTreeBuilder extends TreeBuilder {
@Override
- protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
+ protected void initialiseParse(String input, String baseUri,
+ ParseErrorList errors) {
super.initialiseParse(input, baseUri, errors);
- stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
+ stack.add(doc); // place the document onto the stack. differs from
+ // HtmlTreeBuilder (not on stack)
}
@Override
protected boolean process(Token token) {
// start tag, end tag, doctype, comment, character, eof
switch (token.type) {
- case StartTag:
- insert(token.asStartTag());
- break;
- case EndTag:
- popStackToClose(token.asEndTag());
- break;
- case Comment:
- insert(token.asComment());
- break;
- case Character:
- insert(token.asCharacter());
- break;
- case Doctype:
- insert(token.asDoctype());
- break;
- case EOF: // could put some normalisation here if desired
- break;
- default:
- Validate.fail("Unexpected token type: " + token.type);
+ case StartTag:
+ insert(token.asStartTag());
+ break;
+ case EndTag:
+ popStackToClose(token.asEndTag());
+ break;
+ case Comment:
+ insert(token.asComment());
+ break;
+ case Character:
+ insert(token.asCharacter());
+ break;
+ case Doctype:
+ insert(token.asDoctype());
+ break;
+ case EOF: // could put some normalisation here if desired
+ break;
+ default:
+ Validate.fail("Unexpected token type: " + token.type);
}
return true;
}
@@ -48,13 +54,15 @@ public class XmlTreeBuilder extends TreeBuilder {
Element insert(Token.StartTag startTag) {
Tag tag = Tag.valueOf(startTag.name());
- // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
+ // todo: wonder if for xml parsing, should treat all tags as unknown?
+ // because it's not html.
Element el = new Element(tag, baseUri, startTag.attributes);
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
- if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above.
+ if (!tag.isKnownTag()) {
tag.setSelfClosing();
+ }
} else {
stack.add(el);
}
@@ -72,14 +80,15 @@ public class XmlTreeBuilder extends TreeBuilder {
}
void insert(Token.Doctype d) {
- DocumentType doctypeNode = new DocumentType(d.getName(), d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
+ DocumentType doctypeNode = new DocumentType(d.getName(),
+ d.getPublicIdentifier(), d.getSystemIdentifier(), baseUri);
insertNode(doctypeNode);
}
/**
- * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
- * found, skips.
- *
+ * If the stack contains an element with this tag's name, pop up the stack
+ * to remove the first occurrence. If not found, skips.
+ *
* @param endTag
*/
private void popStackToClose(Token.EndTag endTag) {
@@ -94,8 +103,9 @@ public class XmlTreeBuilder extends TreeBuilder {
break;
}
}
- if (firstFound == null)
+ if (firstFound == null) {
return; // not found, skip
+ }
it = stack.descendingIterator();
while (it.hasNext()) {
diff --git a/server/src/org/jsoup/parser/package-info.java b/server/src/org/jsoup/parser/package-info.java
index 168fdf4086..c6c3d9a029 100644
--- a/server/src/org/jsoup/parser/package-info.java
+++ b/server/src/org/jsoup/parser/package-info.java
@@ -2,3 +2,4 @@
Contains the HTML parser, tag specifications, and HTML tokeniser.
*/
package org.jsoup.parser;
+
diff --git a/server/src/org/jsoup/safety/Cleaner.java b/server/src/org/jsoup/safety/Cleaner.java
index eda67df86b..046efbbaa8 100644
--- a/server/src/org/jsoup/safety/Cleaner.java
+++ b/server/src/org/jsoup/safety/Cleaner.java
@@ -1,29 +1,41 @@
package org.jsoup.safety;
+import java.util.List;
+
import org.jsoup.helper.Validate;
-import org.jsoup.nodes.*;
+import org.jsoup.nodes.Attribute;
+import org.jsoup.nodes.Attributes;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
-import java.util.List;
-
/**
- The whitelist based HTML cleaner. Use to ensure that end-user provided HTML contains only the elements and attributes
- that you are expecting; no junk, and no cross-site scripting attacks!
- <p/>
- The HTML cleaner parses the input as HTML and then runs it through a white-list, so the output HTML can only contain
- HTML that is allowed by the whitelist.
- <p/>
- It is assumed that the input HTML is a body fragment; the clean methods only pull from the source's body, and the
- canned white-lists only allow body contained tags.
- <p/>
- Rather than interacting directly with a Cleaner object, generally see the {@code clean} methods in {@link org.jsoup.Jsoup}.
+ * The whitelist based HTML cleaner. Use to ensure that end-user provided HTML
+ * contains only the elements and attributes that you are expecting; no junk,
+ * and no cross-site scripting attacks!
+ * <p/>
+ * The HTML cleaner parses the input as HTML and then runs it through a
+ * white-list, so the output HTML can only contain HTML that is allowed by the
+ * whitelist.
+ * <p/>
+ * It is assumed that the input HTML is a body fragment; the clean methods only
+ * pull from the source's body, and the canned white-lists only allow body
+ * contained tags.
+ * <p/>
+ * Rather than interacting directly with a Cleaner object, generally see the
+ * {@code clean} methods in {@link org.jsoup.Jsoup}.
*/
public class Cleaner {
private Whitelist whitelist;
/**
- Create a new cleaner, that sanitizes documents using the supplied whitelist.
- @param whitelist white-list to clean with
+ * Create a new cleaner, that sanitizes documents using the supplied
+ * whitelist.
+ *
+ * @param whitelist
+ * white-list to clean with
*/
public Cleaner(Whitelist whitelist) {
Validate.notNull(whitelist);
@@ -31,10 +43,14 @@ public class Cleaner {
}
/**
- Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
- The original document is not modified. Only elements from the dirt document's <code>body</code> are used.
- @param dirtyDocument Untrusted base document to clean.
- @return cleaned document.
+ * Creates a new, clean document, from the original dirty document,
+ * containing only elements allowed by the whitelist. The original document
+ * is not modified. Only elements from the dirt document's <code>body</code>
+ * are used.
+ *
+ * @param dirtyDocument
+ * Untrusted base document to clean.
+ * @return cleaned document.
*/
public Document clean(Document dirtyDocument) {
Validate.notNull(dirtyDocument);
@@ -46,14 +62,20 @@ public class Cleaner {
}
/**
- Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes
- in the input HTML are allowed by the whitelist.
- <p/>
- This method can be used as a validator for user input forms. An invalid document will still be cleaned successfully
- using the {@link #clean(Document)} document. If using as a validator, it is recommended to still clean the document
- to ensure enforced attributes are set correctly, and that the output is tidied.
- @param dirtyDocument document to test
- @return true if no tags or attributes need to be removed; false if they do
+ * Determines if the input document is valid, against the whitelist. It is
+ * considered valid if all the tags and attributes in the input HTML are
+ * allowed by the whitelist.
+ * <p/>
+ * This method can be used as a validator for user input forms. An invalid
+ * document will still be cleaned successfully using the
+ * {@link #clean(Document)} document. If using as a validator, it is
+ * recommended to still clean the document to ensure enforced attributes are
+ * set correctly, and that the output is tidied.
+ *
+ * @param dirtyDocument
+ * document to test
+ * @return true if no tags or attributes need to be removed; false if they
+ * do
*/
public boolean isValid(Document dirtyDocument) {
Validate.notNull(dirtyDocument);
@@ -64,10 +86,14 @@ public class Cleaner {
}
/**
- Iterates the input and copies trusted nodes (tags, attributes, text) into the destination.
- @param source source of HTML
- @param dest destination element to copy into
- @return number of discarded elements (that were considered unsafe)
+ * Iterates the input and copies trusted nodes (tags, attributes, text) into
+ * the destination.
+ *
+ * @param source
+ * source of HTML
+ * @param dest
+ * destination element to copy into
+ * @return number of discarded elements (that were considered unsafe)
*/
private int copySafeNodes(Element source, Element dest) {
List<Node> sourceChildren = source.childNodes();
@@ -77,20 +103,24 @@ public class Cleaner {
if (sourceChild instanceof Element) {
Element sourceEl = (Element) sourceChild;
- if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
+ if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone
+ // and copy safe
+ // attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
dest.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
numDiscarded += copySafeNodes(sourceEl, destChild); // recurs
- } else { // not a safe tag, but it may have children (els or text) that are, so recurse
+ } else { // not a safe tag, but it may have children (els or
+ // text) that are, so recurse
numDiscarded++;
numDiscarded += copySafeNodes(sourceEl, dest);
}
} else if (sourceChild instanceof TextNode) {
TextNode sourceText = (TextNode) sourceChild;
- TextNode destText = new TextNode(sourceText.getWholeText(), sourceChild.baseUri());
+ TextNode destText = new TextNode(sourceText.getWholeText(),
+ sourceChild.baseUri());
dest.appendChild(destText);
} // else, we don't care about comments, xml proc instructions, etc
}
@@ -100,15 +130,17 @@ public class Cleaner {
private ElementMeta createSafeElement(Element sourceEl) {
String sourceTag = sourceEl.tagName();
Attributes destAttrs = new Attributes();
- Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
+ Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(),
+ destAttrs);
int numDiscarded = 0;
Attributes sourceAttrs = sourceEl.attributes();
for (Attribute sourceAttr : sourceAttrs) {
- if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
+ if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) {
destAttrs.put(sourceAttr);
- else
+ } else {
numDiscarded++;
+ }
}
Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
destAttrs.addAll(enforcedAttrs);
diff --git a/server/src/org/jsoup/safety/Whitelist.java b/server/src/org/jsoup/safety/Whitelist.java
index 2c1150ce9e..b86cb5c6cf 100644
--- a/server/src/org/jsoup/safety/Whitelist.java
+++ b/server/src/org/jsoup/safety/Whitelist.java
@@ -1,171 +1,187 @@
package org.jsoup.safety;
/*
- Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/rgrove/sanitize/, which inspired
- this whitelist configuration, and the initial defaults.
+ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/rgrove/sanitize/, which inspired
+ this whitelist configuration, and the initial defaults.
*/
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Attribute;
-import org.jsoup.nodes.Attributes;
-import org.jsoup.nodes.Element;
-
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Attribute;
+import org.jsoup.nodes.Attributes;
+import org.jsoup.nodes.Element;
/**
- Whitelists define what HTML (elements and attributes) to allow through the cleaner. Everything else is removed.
- <p/>
- Start with one of the defaults:
- <ul>
- <li>{@link #none}
- <li>{@link #simpleText}
- <li>{@link #basic}
- <li>{@link #basicWithImages}
- <li>{@link #relaxed}
- </ul>
- <p/>
- If you need to allow more through (please be careful!), tweak a base whitelist with:
- <ul>
- <li>{@link #addTags}
- <li>{@link #addAttributes}
- <li>{@link #addEnforcedAttribute}
- <li>{@link #addProtocols}
- </ul>
- <p/>
- The cleaner and these whitelists assume that you want to clean a <code>body</code> fragment of HTML (to add user
- supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, either wrap the
- document HTML around the cleaned body HTML, or create a whitelist that allows <code>html</code> and <code>head</code>
- elements as appropriate.
- <p/>
- If you are going to extend a whitelist, please be very careful. Make sure you understand what attributes may lead to
- XSS attack vectors. URL attributes are particularly vulnerable and require careful validation. See
- http://ha.ckers.org/xss.html for some XSS attack examples.
-
- @author Jonathan Hedley
+ * Whitelists define what HTML (elements and attributes) to allow through the
+ * cleaner. Everything else is removed.
+ * <p/>
+ * Start with one of the defaults:
+ * <ul>
+ * <li>{@link #none}
+ * <li>{@link #simpleText}
+ * <li>{@link #basic}
+ * <li>{@link #basicWithImages}
+ * <li>{@link #relaxed}
+ * </ul>
+ * <p/>
+ * If you need to allow more through (please be careful!), tweak a base
+ * whitelist with:
+ * <ul>
+ * <li>{@link #addTags}
+ * <li>{@link #addAttributes}
+ * <li>{@link #addEnforcedAttribute}
+ * <li>{@link #addProtocols}
+ * </ul>
+ * <p/>
+ * The cleaner and these whitelists assume that you want to clean a
+ * <code>body</code> fragment of HTML (to add user supplied HTML into a
+ * templated page), and not to clean a full HTML document. If the latter is the
+ * case, either wrap the document HTML around the cleaned body HTML, or create a
+ * whitelist that allows <code>html</code> and <code>head</code> elements as
+ * appropriate.
+ * <p/>
+ * If you are going to extend a whitelist, please be very careful. Make sure you
+ * understand what attributes may lead to XSS attack vectors. URL attributes are
+ * particularly vulnerable and require careful validation. See
+ * http://ha.ckers.org/xss.html for some XSS attack examples.
+ *
+ * @author Jonathan Hedley
*/
public class Whitelist {
- private Set<TagName> tagNames; // tags allowed, lower case. e.g. [p, br, span]
- private Map<TagName, Set<AttributeKey>> attributes; // tag -> attribute[]. allowed attributes [href] for a tag.
- private Map<TagName, Map<AttributeKey, AttributeValue>> enforcedAttributes; // always set these attribute values
- private Map<TagName, Map<AttributeKey, Set<Protocol>>> protocols; // allowed URL protocols for attributes
+ private Set<TagName> tagNames; // tags allowed, lower case. e.g. [p, br,
+ // span]
+ private Map<TagName, Set<AttributeKey>> attributes; // tag -> attribute[].
+ // allowed attributes
+ // [href] for a tag.
+ private Map<TagName, Map<AttributeKey, AttributeValue>> enforcedAttributes; // always
+ // set
+ // these
+ // attribute
+ // values
+ private Map<TagName, Map<AttributeKey, Set<Protocol>>> protocols; // allowed
+ // URL
+ // protocols
+ // for
+ // attributes
private boolean preserveRelativeLinks; // option to preserve relative links
/**
- This whitelist allows only text nodes: all HTML will be stripped.
-
- @return whitelist
+ * This whitelist allows only text nodes: all HTML will be stripped.
+ *
+ * @return whitelist
*/
public static Whitelist none() {
return new Whitelist();
}
/**
- This whitelist allows only simple text formatting: <code>b, em, i, strong, u</code>. All other HTML (tags and
- attributes) will be removed.
-
- @return whitelist
+ * This whitelist allows only simple text formatting:
+ * <code>b, em, i, strong, u</code>. All other HTML (tags and attributes)
+ * will be removed.
+ *
+ * @return whitelist
*/
public static Whitelist simpleText() {
- return new Whitelist()
- .addTags("b", "em", "i", "strong", "u")
- ;
+ return new Whitelist().addTags("b", "em", "i", "strong", "u");
}
/**
- This whitelist allows a fuller range of text nodes: <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li,
- ol, p, pre, q, small, strike, strong, sub, sup, u, ul</code>, and appropriate attributes.
- <p/>
- Links (<code>a</code> elements) can point to <code>http, https, ftp, mailto</code>, and have an enforced
- <code>rel=nofollow</code> attribute.
- <p/>
- Does not allow images.
-
- @return whitelist
+ * This whitelist allows a fuller range of text nodes:
+ * <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li,
+ ol, p, pre, q, small, strike, strong, sub, sup, u, ul</code>, and
+ * appropriate attributes.
+ * <p/>
+ * Links (<code>a</code> elements) can point to
+ * <code>http, https, ftp, mailto</code>, and have an enforced
+ * <code>rel=nofollow</code> attribute.
+ * <p/>
+ * Does not allow images.
+ *
+ * @return whitelist
*/
public static Whitelist basic() {
return new Whitelist()
- .addTags(
- "a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em",
- "i", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub",
- "sup", "u", "ul")
+ .addTags("a", "b", "blockquote", "br", "cite", "code", "dd",
+ "dl", "dt", "em", "i", "li", "ol", "p", "pre", "q",
+ "small", "strike", "strong", "sub", "sup", "u", "ul")
- .addAttributes("a", "href")
- .addAttributes("blockquote", "cite")
+ .addAttributes("a", "href").addAttributes("blockquote", "cite")
.addAttributes("q", "cite")
.addProtocols("a", "href", "ftp", "http", "https", "mailto")
.addProtocols("blockquote", "cite", "http", "https")
.addProtocols("cite", "cite", "http", "https")
- .addEnforcedAttribute("a", "rel", "nofollow")
- ;
+ .addEnforcedAttribute("a", "rel", "nofollow");
}
/**
- This whitelist allows the same text tags as {@link #basic}, and also allows <code>img</code> tags, with appropriate
- attributes, with <code>src</code> pointing to <code>http</code> or <code>https</code>.
-
- @return whitelist
+ * This whitelist allows the same text tags as {@link #basic}, and also
+ * allows <code>img</code> tags, with appropriate attributes, with
+ * <code>src</code> pointing to <code>http</code> or <code>https</code>.
+ *
+ * @return whitelist
*/
public static Whitelist basicWithImages() {
return basic()
.addTags("img")
- .addAttributes("img", "align", "alt", "height", "src", "title", "width")
- .addProtocols("img", "src", "http", "https")
- ;
+ .addAttributes("img", "align", "alt", "height", "src", "title",
+ "width").addProtocols("img", "src", "http", "https");
}
/**
- This whitelist allows a full range of text and structural body HTML: <code>a, b, blockquote, br, caption, cite,
+ * This whitelist allows a full range of text and structural body HTML:
+ * <code>a, b, blockquote, br, caption, cite,
code, col, colgroup, dd, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, strike, strong, sub,
sup, table, tbody, td, tfoot, th, thead, tr, u, ul</code>
- <p/>
- Links do not have an enforced <code>rel=nofollow</code> attribute, but you can add that if desired.
-
- @return whitelist
+ * <p/>
+ * Links do not have an enforced <code>rel=nofollow</code> attribute, but
+ * you can add that if desired.
+ *
+ * @return whitelist
*/
public static Whitelist relaxed() {
return new Whitelist()
- .addTags(
- "a", "b", "blockquote", "br", "caption", "cite", "code", "col",
- "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6",
- "i", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong",
- "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u",
- "ul")
+ .addTags("a", "b", "blockquote", "br", "caption", "cite",
+ "code", "col", "colgroup", "dd", "div", "dl", "dt",
+ "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "img",
+ "li", "ol", "p", "pre", "q", "small", "strike",
+ "strong", "sub", "sup", "table", "tbody", "td",
+ "tfoot", "th", "thead", "tr", "u", "ul")
.addAttributes("a", "href", "title")
.addAttributes("blockquote", "cite")
.addAttributes("col", "span", "width")
.addAttributes("colgroup", "span", "width")
- .addAttributes("img", "align", "alt", "height", "src", "title", "width")
+ .addAttributes("img", "align", "alt", "height", "src", "title",
+ "width")
.addAttributes("ol", "start", "type")
.addAttributes("q", "cite")
.addAttributes("table", "summary", "width")
- .addAttributes("td", "abbr", "axis", "colspan", "rowspan", "width")
- .addAttributes(
- "th", "abbr", "axis", "colspan", "rowspan", "scope",
+ .addAttributes("td", "abbr", "axis", "colspan", "rowspan",
"width")
- .addAttributes("ul", "type")
+ .addAttributes("th", "abbr", "axis", "colspan", "rowspan",
+ "scope", "width").addAttributes("ul", "type")
.addProtocols("a", "href", "ftp", "http", "https", "mailto")
.addProtocols("blockquote", "cite", "http", "https")
.addProtocols("img", "src", "http", "https")
- .addProtocols("q", "cite", "http", "https")
- ;
+ .addProtocols("q", "cite", "http", "https");
}
/**
- Create a new, empty whitelist. Generally it will be better to start with a default prepared whitelist instead.
-
- @see #basic()
- @see #basicWithImages()
- @see #simpleText()
- @see #relaxed()
+ * Create a new, empty whitelist. Generally it will be better to start with
+ * a default prepared whitelist instead.
+ *
+ * @see #basic()
+ * @see #basicWithImages()
+ * @see #simpleText()
+ * @see #relaxed()
*/
public Whitelist() {
tagNames = new HashSet<TagName>();
@@ -176,10 +192,12 @@ public class Whitelist {
}
/**
- Add a list of allowed elements to a whitelist. (If a tag is not allowed, it will be removed from the HTML.)
-
- @param tags tag names to allow
- @return this (for chaining)
+ * Add a list of allowed elements to a whitelist. (If a tag is not allowed,
+ * it will be removed from the HTML.)
+ *
+ * @param tags
+ * tag names to allow
+ * @return this (for chaining)
*/
public Whitelist addTags(String... tags) {
Validate.notNull(tags);
@@ -192,17 +210,22 @@ public class Whitelist {
}
/**
- Add a list of allowed attributes to a tag. (If an attribute is not allowed on an element, it will be removed.)
- <p/>
- E.g.: <code>addAttributes("a", "href", "class")</code> allows <code>href</code> and <code>class</code> attributes
- on <code>a</code> tags.
- <p/>
- To make an attribute valid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g.
- <code>addAttributes(":all", "class")</code>.
-
- @param tag The tag the attributes are for. The tag will be added to the allowed tag list if necessary.
- @param keys List of valid attributes for the tag
- @return this (for chaining)
+ * Add a list of allowed attributes to a tag. (If an attribute is not
+ * allowed on an element, it will be removed.)
+ * <p/>
+ * E.g.: <code>addAttributes("a", "href", "class")</code> allows
+ * <code>href</code> and <code>class</code> attributes on <code>a</code>
+ * tags.
+ * <p/>
+ * To make an attribute valid for <b>all tags</b>, use the pseudo tag
+ * <code>:all</code>, e.g. <code>addAttributes(":all", "class")</code>.
+ *
+ * @param tag
+ * The tag the attributes are for. The tag will be added to the
+ * allowed tag list if necessary.
+ * @param keys
+ * List of valid attributes for the tag
+ * @return this (for chaining)
*/
public Whitelist addAttributes(String tag, String... keys) {
Validate.notEmpty(tag);
@@ -210,8 +233,9 @@ public class Whitelist {
Validate.isTrue(keys.length > 0, "No attributes supplied.");
TagName tagName = TagName.valueOf(tag);
- if (!tagNames.contains(tagName))
+ if (!tagNames.contains(tagName)) {
tagNames.add(tagName);
+ }
Set<AttributeKey> attributeSet = new HashSet<AttributeKey>();
for (String key : keys) {
Validate.notEmpty(key);
@@ -227,16 +251,22 @@ public class Whitelist {
}
/**
- Add an enforced attribute to a tag. An enforced attribute will always be added to the element. If the element
- already has the attribute set, it will be overridden.
- <p/>
- E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code> tags output as
- <code>&lt;a href="..." rel="nofollow"></code>
-
- @param tag The tag the enforced attribute is for. The tag will be added to the allowed tag list if necessary.
- @param key The attribute key
- @param value The enforced attribute value
- @return this (for chaining)
+ * Add an enforced attribute to a tag. An enforced attribute will always be
+ * added to the element. If the element already has the attribute set, it
+ * will be overridden.
+ * <p/>
+ * E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make
+ * all <code>a</code> tags output as
+ * <code>&lt;a href="..." rel="nofollow"></code>
+ *
+ * @param tag
+ * The tag the enforced attribute is for. The tag will be added
+ * to the allowed tag list if necessary.
+ * @param key
+ * The attribute key
+ * @param value
+ * The enforced attribute value
+ * @return this (for chaining)
*/
public Whitelist addEnforcedAttribute(String tag, String key, String value) {
Validate.notEmpty(tag);
@@ -244,8 +274,9 @@ public class Whitelist {
Validate.notEmpty(value);
TagName tagName = TagName.valueOf(tag);
- if (!tagNames.contains(tagName))
+ if (!tagNames.contains(tagName)) {
tagNames.add(tagName);
+ }
AttributeKey attrKey = AttributeKey.valueOf(key);
AttributeValue attrVal = AttributeValue.valueOf(value);
@@ -260,16 +291,21 @@ public class Whitelist {
}
/**
- * Configure this Whitelist to preserve relative links in an element's URL attribute, or convert them to absolute
- * links. By default, this is <b>false</b>: URLs will be made absolute (e.g. start with an allowed protocol, like
- * e.g. {@code http://}.
+ * Configure this Whitelist to preserve relative links in an element's URL
+ * attribute, or convert them to absolute links. By default, this is
+ * <b>false</b>: URLs will be made absolute (e.g. start with an allowed
+ * protocol, like e.g. {@code http://}.
* <p />
- * Note that when handling relative links, the input document must have an appropriate {@code base URI} set when
- * parsing, so that the link's protocol can be confirmed. Regardless of the setting of the {@code preserve relative
- * links} option, the link must be resolvable against the base URI to an allowed protocol; otherwise the attribute
- * will be removed.
- *
- * @param preserve {@code true} to allow relative links, {@code false} (default) to deny
+ * Note that when handling relative links, the input document must have an
+ * appropriate {@code base URI} set when parsing, so that the link's
+ * protocol can be confirmed. Regardless of the setting of the
+ * {@code preserve relative
+ * links} option, the link must be resolvable against the base URI to an
+ * allowed protocol; otherwise the attribute will be removed.
+ *
+ * @param preserve
+ * {@code true} to allow relative links, {@code false} (default)
+ * to deny
* @return this Whitelist, for chaining.
* @see #addProtocols
*/
@@ -279,15 +315,18 @@ public class Whitelist {
}
/**
- Add allowed URL protocols for an element's URL attribute. This restricts the possible values of the attribute to
- URLs with the defined protocol.
- <p/>
- E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code>
-
- @param tag Tag the URL protocol is for
- @param key Attribute key
- @param protocols List of valid protocols
- @return this, for chaining
+ * Add allowed URL protocols for an element's URL attribute. This restricts
+ * the possible values of the attribute to URLs with the defined protocol.
+ * <p/>
+ * E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code>
+ *
+ * @param tag
+ * Tag the URL protocol is for
+ * @param key
+ * Attribute key
+ * @param protocols
+ * List of valid protocols
+ * @return this, for chaining
*/
public Whitelist addProtocols(String tag, String key, String... protocols) {
Validate.notEmpty(tag);
@@ -330,9 +369,11 @@ public class Whitelist {
if (attributes.containsKey(tag)) {
if (attributes.get(tag).contains(key)) {
if (protocols.containsKey(tag)) {
- Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
+ Map<AttributeKey, Set<Protocol>> attrProts = protocols
+ .get(tag);
// ok if not defined protocol; otherwise test
- return !attrProts.containsKey(key) || testValidProtocol(el, attr, attrProts.get(key));
+ return !attrProts.containsKey(key)
+ || testValidProtocol(el, attr, attrProts.get(key));
} else { // attribute found, no protocols defined, so OK
return true;
}
@@ -342,15 +383,20 @@ public class Whitelist {
return !tagName.equals(":all") && isSafeAttribute(":all", el, attr);
}
- private boolean testValidProtocol(Element el, Attribute attr, Set<Protocol> protocols) {
- // try to resolve relative urls to abs, and optionally update the attribute so output html has abs.
+ private boolean testValidProtocol(Element el, Attribute attr,
+ Set<Protocol> protocols) {
+ // try to resolve relative urls to abs, and optionally update the
+ // attribute so output html has abs.
// rels without a baseuri get removed
String value = el.absUrl(attr.getKey());
- if (value.length() == 0)
- value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
- if (!preserveRelativeLinks)
+ if (value.length() == 0) {
+ value = attr.getValue(); // if it could not be made abs, run as-is
+ // to allow custom unknown protocols
+ }
+ if (!preserveRelativeLinks) {
attr.setValue(value);
-
+ }
+
for (Protocol protocol : protocols) {
String prot = protocol.toString() + ":";
if (value.toLowerCase().startsWith(prot)) {
@@ -364,14 +410,17 @@ public class Whitelist {
Attributes attrs = new Attributes();
TagName tag = TagName.valueOf(tagName);
if (enforcedAttributes.containsKey(tag)) {
- Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
- for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
- attrs.put(entry.getKey().toString(), entry.getValue().toString());
+ Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes
+ .get(tag);
+ for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals
+ .entrySet()) {
+ attrs.put(entry.getKey().toString(), entry.getValue()
+ .toString());
}
}
return attrs;
}
-
+
// named types for config. All just hold strings, but here for my sanity.
static class TagName extends TypedValue {
@@ -432,13 +481,23 @@ public class Whitelist {
@Override
public boolean equals(Object obj) {
- if (this == obj) return true;
- if (obj == null) return false;
- if (getClass() != obj.getClass()) return false;
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
TypedValue other = (TypedValue) obj;
if (value == null) {
- if (other.value != null) return false;
- } else if (!value.equals(other.value)) return false;
+ if (other.value != null) {
+ return false;
+ }
+ } else if (!value.equals(other.value)) {
+ return false;
+ }
return true;
}
@@ -448,4 +507,3 @@ public class Whitelist {
}
}
}
-
diff --git a/server/src/org/jsoup/safety/package-info.java b/server/src/org/jsoup/safety/package-info.java
index ac890f0607..acbff6665f 100644
--- a/server/src/org/jsoup/safety/package-info.java
+++ b/server/src/org/jsoup/safety/package-info.java
@@ -2,3 +2,4 @@
Contains the jsoup HTML cleaner, and whitelist definitions.
*/
package org.jsoup.safety;
+
diff --git a/server/src/org/jsoup/select/Collector.java b/server/src/org/jsoup/select/Collector.java
index 8f01045768..20554e8653 100644
--- a/server/src/org/jsoup/select/Collector.java
+++ b/server/src/org/jsoup/select/Collector.java
@@ -5,7 +5,7 @@ import org.jsoup.nodes.Node;
/**
* Collects a list of elements that match the supplied criteria.
- *
+ *
* @author Jonathan Hedley
*/
public class Collector {
@@ -14,12 +14,16 @@ public class Collector {
}
/**
- Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
- @param eval Evaluator to test elements against
- @param root root of tree to descend
- @return list of matches; empty if none
+ * Build a list of elements, by visiting root and every descendant of root,
+ * and testing it against the evaluator.
+ *
+ * @param eval
+ * Evaluator to test elements against
+ * @param root
+ * root of tree to descend
+ * @return list of matches; empty if none
*/
- public static Elements collect (Evaluator eval, Element root) {
+ public static Elements collect(Evaluator eval, Element root) {
Elements elements = new Elements();
new NodeTraversor(new Accumulator(root, elements, eval)).traverse(root);
return elements;
@@ -36,14 +40,17 @@ public class Collector {
this.eval = eval;
}
+ @Override
public void head(Node node, int depth) {
if (node instanceof Element) {
Element el = (Element) node;
- if (eval.matches(root, el))
+ if (eval.matches(root, el)) {
elements.add(el);
+ }
}
}
+ @Override
public void tail(Node node, int depth) {
// void
}
diff --git a/server/src/org/jsoup/select/CombiningEvaluator.java b/server/src/org/jsoup/select/CombiningEvaluator.java
index a31ed2636f..c3f9a8af2e 100644
--- a/server/src/org/jsoup/select/CombiningEvaluator.java
+++ b/server/src/org/jsoup/select/CombiningEvaluator.java
@@ -1,13 +1,13 @@
package org.jsoup.select;
-import org.jsoup.helper.StringUtil;
-import org.jsoup.nodes.Element;
-
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
+import org.jsoup.helper.StringUtil;
+import org.jsoup.nodes.Element;
+
/**
* Base combining (and, or) evaluator.
*/
@@ -25,9 +25,10 @@ abstract class CombiningEvaluator extends Evaluator {
}
Evaluator rightMostEvaluator() {
- return evaluators.size() > 0 ? evaluators.get(evaluators.size() - 1) : null;
+ return evaluators.size() > 0 ? evaluators.get(evaluators.size() - 1)
+ : null;
}
-
+
void replaceRightMostEvaluator(Evaluator replacement) {
evaluators.set(evaluators.size() - 1, replacement);
}
@@ -44,8 +45,9 @@ abstract class CombiningEvaluator extends Evaluator {
@Override
public boolean matches(Element root, Element node) {
for (Evaluator s : evaluators) {
- if (!s.matches(root, node))
+ if (!s.matches(root, node)) {
return false;
+ }
}
return true;
}
@@ -58,15 +60,20 @@ abstract class CombiningEvaluator extends Evaluator {
static final class Or extends CombiningEvaluator {
/**
- * Create a new Or evaluator. The initial evaluators are ANDed together and used as the first clause of the OR.
- * @param evaluators initial OR clause (these are wrapped into an AND evaluator).
+ * Create a new Or evaluator. The initial evaluators are ANDed together
+ * and used as the first clause of the OR.
+ *
+ * @param evaluators
+ * initial OR clause (these are wrapped into an AND
+ * evaluator).
*/
Or(Collection<Evaluator> evaluators) {
super();
- if (evaluators.size() > 1)
+ if (evaluators.size() > 1) {
this.evaluators.add(new And(evaluators));
- else // 0 or 1
+ } else {
this.evaluators.addAll(evaluators);
+ }
}
Or() {
@@ -80,8 +87,9 @@ abstract class CombiningEvaluator extends Evaluator {
@Override
public boolean matches(Element root, Element node) {
for (Evaluator s : evaluators) {
- if (s.matches(root, node))
+ if (s.matches(root, node)) {
return true;
+ }
}
return false;
}
diff --git a/server/src/org/jsoup/select/Elements.java b/server/src/org/jsoup/select/Elements.java
index 8302da1e53..cddea67d96 100644
--- a/server/src/org/jsoup/select/Elements.java
+++ b/server/src/org/jsoup/select/Elements.java
@@ -1,17 +1,26 @@
package org.jsoup.select;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.ListIterator;
+
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
-import java.util.*;
-
/**
- A list of {@link Element Elements}, with methods that act on every element in the list.
- <p/>
- To get an Elements object, use the {@link Element#select(String)} method.
-
- @author Jonathan Hedley, jonathan@hedley.net */
+ * A list of {@link Element Elements}, with methods that act on every element in
+ * the list.
+ * <p/>
+ * To get an Elements object, use the {@link Element#select(String)} method.
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
public class Elements implements List<Element>, Cloneable {
private List<Element> contents;
@@ -26,59 +35,70 @@ public class Elements implements List<Element>, Cloneable {
public Elements(Collection<Element> elements) {
contents = new ArrayList<Element>(elements);
}
-
+
public Elements(List<Element> elements) {
contents = elements;
}
-
+
public Elements(Element... elements) {
this(Arrays.asList(elements));
}
-
+
@Override
- public Elements clone() {
- List<Element> elements = new ArrayList<Element>();
-
- for(Element e : contents)
- elements.add(e.clone());
-
-
- return new Elements(elements);
- }
-
- // attribute methods
- /**
- Get an attribute value from the first matched element that has the attribute.
- @param attributeKey The attribute key.
- @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true),
- or if the no elements have the attribute, returns empty string.
- @see #hasAttr(String)
+ public Elements clone() {
+ List<Element> elements = new ArrayList<Element>();
+
+ for (Element e : contents) {
+ elements.add(e.clone());
+ }
+
+ return new Elements(elements);
+ }
+
+ // attribute methods
+ /**
+ * Get an attribute value from the first matched element that has the
+ * attribute.
+ *
+ * @param attributeKey
+ * The attribute key.
+ * @return The attribute value from the first matched element that has the
+ * attribute.. If no elements were matched (isEmpty() == true), or
+ * if the no elements have the attribute, returns empty string.
+ * @see #hasAttr(String)
*/
public String attr(String attributeKey) {
for (Element element : contents) {
- if (element.hasAttr(attributeKey))
+ if (element.hasAttr(attributeKey)) {
return element.attr(attributeKey);
+ }
}
return "";
}
/**
- Checks if any of the matched elements have this attribute set.
- @param attributeKey attribute key
- @return true if any of the elements have the attribute; false if none do.
+ * Checks if any of the matched elements have this attribute set.
+ *
+ * @param attributeKey
+ * attribute key
+ * @return true if any of the elements have the attribute; false if none do.
*/
public boolean hasAttr(String attributeKey) {
for (Element element : contents) {
- if (element.hasAttr(attributeKey))
+ if (element.hasAttr(attributeKey)) {
return true;
+ }
}
return false;
}
/**
* Set an attribute on all matched elements.
- * @param attributeKey attribute key
- * @param attributeValue attribute value
+ *
+ * @param attributeKey
+ * attribute key
+ * @param attributeValue
+ * attribute value
* @return this
*/
public Elements attr(String attributeKey, String attributeValue) {
@@ -90,7 +110,9 @@ public class Elements implements List<Element>, Cloneable {
/**
* Remove an attribute from every matched element.
- * @param attributeKey The attribute to remove.
+ *
+ * @param attributeKey
+ * The attribute to remove.
* @return this (for chaining)
*/
public Elements removeAttr(String attributeKey) {
@@ -101,9 +123,11 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- Add the class name to every matched element's {@code class} attribute.
- @param className class name to add
- @return this
+ * Add the class name to every matched element's {@code class} attribute.
+ *
+ * @param className
+ * class name to add
+ * @return this
*/
public Elements addClass(String className) {
for (Element element : contents) {
@@ -113,9 +137,12 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- Remove the class name from every matched element's {@code class} attribute, if present.
- @param className class name to remove
- @return this
+ * Remove the class name from every matched element's {@code class}
+ * attribute, if present.
+ *
+ * @param className
+ * class name to remove
+ * @return this
*/
public Elements removeClass(String className) {
for (Element element : contents) {
@@ -125,9 +152,12 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- Toggle the class name on every matched element's {@code class} attribute.
- @param className class name to add if missing, or remove if present, from every element.
- @return this
+ * Toggle the class name on every matched element's {@code class} attribute.
+ *
+ * @param className
+ * class name to add if missing, or remove if present, from every
+ * element.
+ * @return this
*/
public Elements toggleClass(String className) {
for (Element element : contents) {
@@ -137,69 +167,83 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- Determine if any of the matched elements have this class name set in their {@code class} attribute.
- @param className class name to check for
- @return true if any do, false if none do
+ * Determine if any of the matched elements have this class name set in
+ * their {@code class} attribute.
+ *
+ * @param className
+ * class name to check for
+ * @return true if any do, false if none do
*/
public boolean hasClass(String className) {
for (Element element : contents) {
- if (element.hasClass(className))
+ if (element.hasClass(className)) {
return true;
+ }
}
return false;
}
-
+
/**
* Get the form element's value of the first matched element.
+ *
* @return The form element's value, or empty if not set.
* @see Element#val()
*/
public String val() {
- if (size() > 0)
+ if (size() > 0) {
return first().val();
- else
+ } else {
return "";
+ }
}
-
+
/**
* Set the form element's value in each of the matched elements.
- * @param value The value to set into each matched element
+ *
+ * @param value
+ * The value to set into each matched element
* @return this (for chaining)
*/
public Elements val(String value) {
- for (Element element : contents)
+ for (Element element : contents) {
element.val(value);
+ }
return this;
}
-
+
/**
* Get the combined text of all the matched elements.
* <p>
- * Note that it is possible to get repeats if the matched elements contain both parent elements and their own
- * children, as the Element.text() method returns the combined text of a parent and all its children.
+ * Note that it is possible to get repeats if the matched elements contain
+ * both parent elements and their own children, as the Element.text() method
+ * returns the combined text of a parent and all its children.
+ *
* @return string of all text: unescaped and no HTML.
* @see Element#text()
*/
public String text() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
- if (sb.length() != 0)
+ if (sb.length() != 0) {
sb.append(" ");
+ }
sb.append(element.text());
}
return sb.toString();
}
public boolean hasText() {
- for (Element element: contents) {
- if (element.hasText())
+ for (Element element : contents) {
+ if (element.hasText()) {
return true;
+ }
}
return false;
}
-
+
/**
* Get the combined inner HTML of all matched elements.
+ *
* @return string of all element's inner HTML.
* @see #text()
* @see #outerHtml()
@@ -207,15 +251,17 @@ public class Elements implements List<Element>, Cloneable {
public String html() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
- if (sb.length() != 0)
+ if (sb.length() != 0) {
sb.append("\n");
+ }
sb.append(element.html());
}
return sb.toString();
}
-
+
/**
* Get the combined outer HTML of all matched elements.
+ *
* @return string of all element's outer HTML.
* @see #text()
* @see #html()
@@ -223,27 +269,33 @@ public class Elements implements List<Element>, Cloneable {
public String outerHtml() {
StringBuilder sb = new StringBuilder();
for (Element element : contents) {
- if (sb.length() != 0)
+ if (sb.length() != 0) {
sb.append("\n");
+ }
sb.append(element.outerHtml());
}
return sb.toString();
}
/**
- * Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}.
+ * Get the combined outer HTML of all matched elements. Alias of
+ * {@link #outerHtml()}.
+ *
* @return string of all element's outer HTML.
* @see #text()
* @see #html()
*/
+ @Override
public String toString() {
return outerHtml();
}
/**
- * Update the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do
- * {@code doc.select("i").tagName("em");}
- * @param tagName the new tag name
+ * Update the tag name of each matched element. For example, to change each
+ * {@code <i>} to a {@code <em>}, do {@code doc.select("i").tagName("em");}
+ *
+ * @param tagName
+ * the new tag name
* @return this, for chaining
* @see Element#tagName(String)
*/
@@ -253,10 +305,12 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
/**
* Set the inner HTML of each matched element.
- * @param html HTML to parse and set into each matched element.
+ *
+ * @param html
+ * HTML to parse and set into each matched element.
* @return this, for chaining
* @see Element#html(String)
*/
@@ -266,10 +320,12 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
/**
* Add the supplied HTML to the start of each matched element's inner HTML.
- * @param html HTML to add inside each element, before the existing HTML
+ *
+ * @param html
+ * HTML to add inside each element, before the existing HTML
* @return this, for chaining
* @see Element#prepend(String)
*/
@@ -279,10 +335,12 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
/**
* Add the supplied HTML to the end of each matched element's inner HTML.
- * @param html HTML to add inside each element, after the existing HTML
+ *
+ * @param html
+ * HTML to add inside each element, after the existing HTML
* @return this, for chaining
* @see Element#append(String)
*/
@@ -292,10 +350,12 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
/**
* Insert the supplied HTML before each matched element's outer HTML.
- * @param html HTML to insert before each element
+ *
+ * @param html
+ * HTML to insert before each element
* @return this, for chaining
* @see Element#before(String)
*/
@@ -305,10 +365,12 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
/**
* Insert the supplied HTML after each matched element's outer HTML.
- * @param html HTML to insert after each element
+ *
+ * @param html
+ * HTML to insert after each element
* @return this, for chaining
* @see Element#after(String)
*/
@@ -320,13 +382,16 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- Wrap the supplied HTML around each matched elements. For example, with HTML
- {@code <p><b>This</b> is <b>Jsoup</b></p>},
- <code>doc.select("b").wrap("&lt;i&gt;&lt;/i&gt;");</code>
- becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>}
- @param html HTML to wrap around each element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
- @return this (for chaining)
- @see Element#wrap
+ * Wrap the supplied HTML around each matched elements. For example, with
+ * HTML {@code <p><b>This</b> is <b>Jsoup</b></p>},
+ * <code>doc.select("b").wrap("&lt;i&gt;&lt;/i&gt;");</code> becomes
+ * {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>}
+ *
+ * @param html
+ * HTML to wrap around each element, e.g.
+ * {@code <div class="head"></div>}. Can be arbitrarily deep.
+ * @return this (for chaining)
+ * @see Element#wrap
*/
public Elements wrap(String html) {
Validate.notEmpty(html);
@@ -337,15 +402,18 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of
- * dropping the elements but keeping their children.
+ * Removes the matched elements from the DOM, and moves their children up
+ * into their parents. This has the effect of dropping the elements but
+ * keeping their children.
* <p/>
- * This is useful for e.g removing unwanted formatting elements but keeping their contents.
+ * This is useful for e.g removing unwanted formatting elements but keeping
+ * their contents.
* <p/>
- * E.g. with HTML: {@code <div><font>One</font> <font><a href="/">Two</a></font></div>}<br/>
+ * E.g. with HTML:
+ * {@code <div><font>One</font> <font><a href="/">Two</a></font></div>}<br/>
* {@code doc.select("font").unwrap();}<br/>
* HTML = {@code <div>One <a href="/">Two</a></div>}
- *
+ *
* @return this (for chaining)
* @see Node#unwrap
*/
@@ -357,12 +425,16 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each
- * element to nothing.
+ * Empty (remove all child nodes from) each matched element. This is similar
+ * to setting the inner HTML of each element to nothing.
* <p>
* E.g. HTML: {@code <div><p>Hello <b>there</b></p> <p>now</p></div>}<br>
* <code>doc.select("p").empty();</code><br>
- * HTML = {@code <div><p></p> <p></p></div>}
+ * HTML = {@code <div>
+ * <p></p>
+ * <p></p>
+ * </div>}
+ *
* @return this, for chaining
* @see Element#empty()
* @see #remove()
@@ -375,13 +447,16 @@ public class Elements implements List<Element>, Cloneable {
}
/**
- * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing.
+ * Remove each matched element from the DOM. This is similar to setting the
+ * outer HTML of each element to nothing.
* <p>
* E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br>
* <code>doc.select("p").remove();</code><br>
* HTML = {@code <div> <img /></div>}
* <p>
- * Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} to clean HTML.
+ * Note that this method should not be used to clean user-submitted HTML;
+ * rather, use {@link org.jsoup.safety.Cleaner} to clean HTML.
+ *
* @return this, for chaining
* @see Element#empty()
* @see #empty()
@@ -392,12 +467,14 @@ public class Elements implements List<Element>, Cloneable {
}
return this;
}
-
+
// filters
-
+
/**
* Find matching elements within this element list.
- * @param query A {@link Selector} query
+ *
+ * @param query
+ * A {@link Selector} query
* @return the filtered list of elements, or an empty list if none match.
*/
public Elements select(String query) {
@@ -411,28 +488,37 @@ public class Elements implements List<Element>, Cloneable {
* <code>Elements divs = doc.select("div").not("#logo");</code><br>
* Result: {@code divs: [<div>Two</div>]}
* <p>
- * @param query the selector query whose results should be removed from these elements
+ *
+ * @param query
+ * the selector query whose results should be removed from these
+ * elements
* @return a new elements list that contains only the filtered results
*/
public Elements not(String query) {
Elements out = Selector.select(query, this);
return Selector.filterOut(this, out);
}
-
+
/**
* Get the <i>nth</i> matched element as an Elements object.
* <p>
* See also {@link #get(int)} to retrieve an Element.
- * @param index the (zero-based) index of the element in the list to retain
- * @return Elements containing only the specified element, or, if that element did not exist, an empty list.
+ *
+ * @param index
+ * the (zero-based) index of the element in the list to retain
+ * @return Elements containing only the specified element, or, if that
+ * element did not exist, an empty list.
*/
public Elements eq(int index) {
- return contents.size() > index ? new Elements(get(index)) : new Elements();
+ return contents.size() > index ? new Elements(get(index))
+ : new Elements();
}
-
+
/**
* Test if any of the matched elements match the supplied query.
- * @param query A selector
+ *
+ * @param query
+ * A selector
* @return true if at least one element in the list matches the query.
*/
public boolean is(String query) {
@@ -442,11 +528,12 @@ public class Elements implements List<Element>, Cloneable {
/**
* Get all of the parents and ancestor elements of the matched elements.
+ *
* @return all of the parents and ancestor elements of the matched elements
*/
public Elements parents() {
HashSet<Element> combo = new LinkedHashSet<Element>();
- for (Element e: contents) {
+ for (Element e : contents) {
combo.addAll(e.parents());
}
return new Elements(combo);
@@ -454,16 +541,20 @@ public class Elements implements List<Element>, Cloneable {
// list-like methods
/**
- Get the first matched element.
- @return The first matched element, or <code>null</code> if contents is empty;
+ * Get the first matched element.
+ *
+ * @return The first matched element, or <code>null</code> if contents is
+ * empty;
*/
public Element first() {
return contents.isEmpty() ? null : contents.get(0);
}
/**
- Get the last matched element.
- @return The last matched element, or <code>null</code> if contents is empty.
+ * Get the last matched element.
+ *
+ * @return The last matched element, or <code>null</code> if contents is
+ * empty.
*/
public Element last() {
return contents.isEmpty() ? null : contents.get(contents.size() - 1);
@@ -471,66 +562,143 @@ public class Elements implements List<Element>, Cloneable {
/**
* Perform a depth-first traversal on each of the selected elements.
- * @param nodeVisitor the visitor callbacks to perform on each node
+ *
+ * @param nodeVisitor
+ * the visitor callbacks to perform on each node
* @return this, for chaining
*/
public Elements traverse(NodeVisitor nodeVisitor) {
Validate.notNull(nodeVisitor);
NodeTraversor traversor = new NodeTraversor(nodeVisitor);
- for (Element el: contents) {
+ for (Element el : contents) {
traversor.traverse(el);
}
return this;
}
// implements List<Element> delegates:
- public int size() {return contents.size();}
+ @Override
+ public int size() {
+ return contents.size();
+ }
- public boolean isEmpty() {return contents.isEmpty();}
+ @Override
+ public boolean isEmpty() {
+ return contents.isEmpty();
+ }
- public boolean contains(Object o) {return contents.contains(o);}
+ @Override
+ public boolean contains(Object o) {
+ return contents.contains(o);
+ }
- public Iterator<Element> iterator() {return contents.iterator();}
+ @Override
+ public Iterator<Element> iterator() {
+ return contents.iterator();
+ }
- public Object[] toArray() {return contents.toArray();}
+ @Override
+ public Object[] toArray() {
+ return contents.toArray();
+ }
- public <T> T[] toArray(T[] a) {return contents.toArray(a);}
+ @Override
+ public <T> T[] toArray(T[] a) {
+ return contents.toArray(a);
+ }
- public boolean add(Element element) {return contents.add(element);}
+ @Override
+ public boolean add(Element element) {
+ return contents.add(element);
+ }
- public boolean remove(Object o) {return contents.remove(o);}
+ @Override
+ public boolean remove(Object o) {
+ return contents.remove(o);
+ }
- public boolean containsAll(Collection<?> c) {return contents.containsAll(c);}
+ @Override
+ public boolean containsAll(Collection<?> c) {
+ return contents.containsAll(c);
+ }
- public boolean addAll(Collection<? extends Element> c) {return contents.addAll(c);}
+ @Override
+ public boolean addAll(Collection<? extends Element> c) {
+ return contents.addAll(c);
+ }
- public boolean addAll(int index, Collection<? extends Element> c) {return contents.addAll(index, c);}
+ @Override
+ public boolean addAll(int index, Collection<? extends Element> c) {
+ return contents.addAll(index, c);
+ }
- public boolean removeAll(Collection<?> c) {return contents.removeAll(c);}
+ @Override
+ public boolean removeAll(Collection<?> c) {
+ return contents.removeAll(c);
+ }
- public boolean retainAll(Collection<?> c) {return contents.retainAll(c);}
+ @Override
+ public boolean retainAll(Collection<?> c) {
+ return contents.retainAll(c);
+ }
- public void clear() {contents.clear();}
+ @Override
+ public void clear() {
+ contents.clear();
+ }
- public boolean equals(Object o) {return contents.equals(o);}
+ @Override
+ public boolean equals(Object o) {
+ return contents.equals(o);
+ }
- public int hashCode() {return contents.hashCode();}
+ @Override
+ public int hashCode() {
+ return contents.hashCode();
+ }
- public Element get(int index) {return contents.get(index);}
+ @Override
+ public Element get(int index) {
+ return contents.get(index);
+ }
- public Element set(int index, Element element) {return contents.set(index, element);}
+ @Override
+ public Element set(int index, Element element) {
+ return contents.set(index, element);
+ }
- public void add(int index, Element element) {contents.add(index, element);}
+ @Override
+ public void add(int index, Element element) {
+ contents.add(index, element);
+ }
- public Element remove(int index) {return contents.remove(index);}
+ @Override
+ public Element remove(int index) {
+ return contents.remove(index);
+ }
- public int indexOf(Object o) {return contents.indexOf(o);}
+ @Override
+ public int indexOf(Object o) {
+ return contents.indexOf(o);
+ }
- public int lastIndexOf(Object o) {return contents.lastIndexOf(o);}
+ @Override
+ public int lastIndexOf(Object o) {
+ return contents.lastIndexOf(o);
+ }
- public ListIterator<Element> listIterator() {return contents.listIterator();}
+ @Override
+ public ListIterator<Element> listIterator() {
+ return contents.listIterator();
+ }
- public ListIterator<Element> listIterator(int index) {return contents.listIterator(index);}
+ @Override
+ public ListIterator<Element> listIterator(int index) {
+ return contents.listIterator(index);
+ }
- public List<Element> subList(int fromIndex, int toIndex) {return contents.subList(fromIndex, toIndex);}
+ @Override
+ public List<Element> subList(int fromIndex, int toIndex) {
+ return contents.subList(fromIndex, toIndex);
+ }
}
diff --git a/server/src/org/jsoup/select/Evaluator.java b/server/src/org/jsoup/select/Evaluator.java
index bd0cee481d..5dd4c91616 100644
--- a/server/src/org/jsoup/select/Evaluator.java
+++ b/server/src/org/jsoup/select/Evaluator.java
@@ -1,12 +1,11 @@
package org.jsoup.select;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Element;
-
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Element;
/**
* Evaluates that an element matches the selector.
@@ -17,9 +16,11 @@ public abstract class Evaluator {
/**
* Test if the element meets the evaluator's requirements.
- *
- * @param root UI of the matching subtree
- * @param element tested element
+ *
+ * @param root
+ * UI of the matching subtree
+ * @param element
+ * tested element
*/
public abstract boolean matches(Element root, Element element);
@@ -122,10 +123,12 @@ public abstract class Evaluator {
@Override
public boolean matches(Element root, Element element) {
- List<org.jsoup.nodes.Attribute> values = element.attributes().asList();
+ List<org.jsoup.nodes.Attribute> values = element.attributes()
+ .asList();
for (org.jsoup.nodes.Attribute attribute : values) {
- if (attribute.getKey().startsWith(keyPrefix))
+ if (attribute.getKey().startsWith(keyPrefix)) {
return true;
+ }
}
return false;
}
@@ -147,7 +150,8 @@ public abstract class Evaluator {
@Override
public boolean matches(Element root, Element element) {
- return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key));
+ return element.hasAttr(key)
+ && value.equalsIgnoreCase(element.attr(key));
}
@Override
@@ -180,14 +184,20 @@ public abstract class Evaluator {
/**
* Evaluator for attribute name/value matching (value prefix)
*/
- public static final class AttributeWithValueStarting extends AttributeKeyPair {
+ public static final class AttributeWithValueStarting extends
+ AttributeKeyPair {
public AttributeWithValueStarting(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
- return element.hasAttr(key) && element.attr(key).toLowerCase().startsWith(value); // value is lower case already
+ return element.hasAttr(key)
+ && element.attr(key).toLowerCase().startsWith(value); // value
+ // is
+ // lower
+ // case
+ // already
}
@Override
@@ -207,7 +217,11 @@ public abstract class Evaluator {
@Override
public boolean matches(Element root, Element element) {
- return element.hasAttr(key) && element.attr(key).toLowerCase().endsWith(value); // value is lower case
+ return element.hasAttr(key)
+ && element.attr(key).toLowerCase().endsWith(value); // value
+ // is
+ // lower
+ // case
}
@Override
@@ -220,14 +234,19 @@ public abstract class Evaluator {
/**
* Evaluator for attribute name/value matching (value containing)
*/
- public static final class AttributeWithValueContaining extends AttributeKeyPair {
+ public static final class AttributeWithValueContaining extends
+ AttributeKeyPair {
public AttributeWithValueContaining(String key, String value) {
super(key, value);
}
@Override
public boolean matches(Element root, Element element) {
- return element.hasAttr(key) && element.attr(key).toLowerCase().contains(value); // value is lower case
+ return element.hasAttr(key)
+ && element.attr(key).toLowerCase().contains(value); // value
+ // is
+ // lower
+ // case
}
@Override
@@ -251,7 +270,8 @@ public abstract class Evaluator {
@Override
public boolean matches(Element root, Element element) {
- return element.hasAttr(key) && pattern.matcher(element.attr(key)).find();
+ return element.hasAttr(key)
+ && pattern.matcher(element.attr(key)).find();
}
@Override
@@ -355,7 +375,7 @@ public abstract class Evaluator {
/**
* Abstract evaluator for sibling index matching
- *
+ *
* @author ant
*/
public abstract static class IndexEvaluator extends Evaluator {
diff --git a/server/src/org/jsoup/select/NodeTraversor.java b/server/src/org/jsoup/select/NodeTraversor.java
index 9bb081e56c..f94a7762fc 100644
--- a/server/src/org/jsoup/select/NodeTraversor.java
+++ b/server/src/org/jsoup/select/NodeTraversor.java
@@ -3,16 +3,21 @@ package org.jsoup.select;
import org.jsoup.nodes.Node;
/**
- * Depth-first node traversor. Use to iterate through all nodes under and including the specified root node.
+ * Depth-first node traversor. Use to iterate through all nodes under and
+ * including the specified root node.
* <p/>
- * This implementation does not use recursion, so a deep DOM does not risk blowing the stack.
+ * This implementation does not use recursion, so a deep DOM does not risk
+ * blowing the stack.
*/
public class NodeTraversor {
private NodeVisitor visitor;
/**
* Create a new traversor.
- * @param visitor a class implementing the {@link NodeVisitor} interface, to be called when visiting each node.
+ *
+ * @param visitor
+ * a class implementing the {@link NodeVisitor} interface, to be
+ * called when visiting each node.
*/
public NodeTraversor(NodeVisitor visitor) {
this.visitor = visitor;
@@ -20,12 +25,14 @@ public class NodeTraversor {
/**
* Start a depth-first traverse of the root and all of its descendants.
- * @param root the root node point to traverse.
+ *
+ * @param root
+ * the root node point to traverse.
*/
public void traverse(Node root) {
Node node = root;
int depth = 0;
-
+
while (node != null) {
visitor.head(node, depth);
if (node.childNodes().size() > 0) {
@@ -38,8 +45,9 @@ public class NodeTraversor {
depth--;
}
visitor.tail(node, depth);
- if (node == root)
+ if (node == root) {
break;
+ }
node = node.nextSibling();
}
}
diff --git a/server/src/org/jsoup/select/NodeVisitor.java b/server/src/org/jsoup/select/NodeVisitor.java
index 20112e8d29..9e827d6c55 100644
--- a/server/src/org/jsoup/select/NodeVisitor.java
+++ b/server/src/org/jsoup/select/NodeVisitor.java
@@ -3,28 +3,37 @@ package org.jsoup.select;
import org.jsoup.nodes.Node;
/**
- * Node visitor interface. Provide an implementing class to {@link NodeTraversor} to iterate through nodes.
+ * Node visitor interface. Provide an implementing class to
+ * {@link NodeTraversor} to iterate through nodes.
* <p/>
- * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first
- * seen, and the tail method when all of the node's children have been visited. As an example, head can be used to
+ * This interface provides two methods, {@code head} and {@code tail}. The head
+ * method is called when the node is first seen, and the tail method when all of
+ * the node's children have been visited. As an example, head can be used to
* create a start tag for a node, and tail to create the end tag.
*/
public interface NodeVisitor {
/**
* Callback for when a node is first visited.
- *
- * @param node the node being visited.
- * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
- * of that will have depth 1.
+ *
+ * @param node
+ * the node being visited.
+ * @param depth
+ * the depth of the node, relative to the root node. E.g., the
+ * root node has depth 0, and a child node of that will have
+ * depth 1.
*/
public void head(Node node, int depth);
/**
- * Callback for when a node is last visited, after all of its descendants have been visited.
- *
- * @param node the node being visited.
- * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
- * of that will have depth 1.
+ * Callback for when a node is last visited, after all of its descendants
+ * have been visited.
+ *
+ * @param node
+ * the node being visited.
+ * @param depth
+ * the depth of the node, relative to the root node. E.g., the
+ * root node has depth 0, and a child node of that will have
+ * depth 1.
*/
public void tail(Node node, int depth);
}
diff --git a/server/src/org/jsoup/select/QueryParser.java b/server/src/org/jsoup/select/QueryParser.java
index d3cc36f91c..7a04899d82 100644
--- a/server/src/org/jsoup/select/QueryParser.java
+++ b/server/src/org/jsoup/select/QueryParser.java
@@ -12,7 +12,7 @@ import org.jsoup.parser.TokenQueue;
* Parses a CSS selector into an Evaluator tree.
*/
class QueryParser {
- private final static String[] combinators = {",", ">", "+", "~", " "};
+ private final static String[] combinators = { ",", ">", "+", "~", " " };
private TokenQueue tq;
private String query;
@@ -20,16 +20,20 @@ class QueryParser {
/**
* Create a new QueryParser.
- * @param query CSS query
+ *
+ * @param query
+ * CSS query
*/
private QueryParser(String query) {
this.query = query;
- this.tq = new TokenQueue(query);
+ tq = new TokenQueue(query);
}
/**
* Parse a CSS query into an Evaluator.
- * @param query CSS query
+ *
+ * @param query
+ * CSS query
* @return Evaluator
*/
public static Evaluator parse(String query) {
@@ -39,12 +43,14 @@ class QueryParser {
/**
* Parse the query
+ *
* @return Evaluator
*/
Evaluator parse() {
tq.consumeWhitespace();
- if (tq.matchesAny(combinators)) { // if starts with a combinator, use root as elements
+ if (tq.matchesAny(combinators)) { // if starts with a combinator, use
+ // root as elements
evals.add(new StructuralEvaluator.Root());
combinator(tq.consume());
} else {
@@ -64,8 +70,9 @@ class QueryParser {
}
}
- if (evals.size() == 1)
+ if (evals.size() == 1) {
return evals.get(0);
+ }
return new CombiningEvaluator.And(evals);
}
@@ -75,33 +82,41 @@ class QueryParser {
String subQuery = consumeSubQuery(); // support multi > childs
Evaluator rootEval; // the new topmost evaluator
- Evaluator currentEval; // the evaluator the new eval will be combined to. could be root, or rightmost or.
- Evaluator newEval = parse(subQuery); // the evaluator to add into target evaluator
+ Evaluator currentEval; // the evaluator the new eval will be combined
+ // to. could be root, or rightmost or.
+ Evaluator newEval = parse(subQuery); // the evaluator to add into target
+ // evaluator
boolean replaceRightMost = false;
if (evals.size() == 1) {
rootEval = currentEval = evals.get(0);
// make sure OR (,) has precedence:
if (rootEval instanceof CombiningEvaluator.Or && combinator != ',') {
- currentEval = ((CombiningEvaluator.Or) currentEval).rightMostEvaluator();
+ currentEval = ((CombiningEvaluator.Or) currentEval)
+ .rightMostEvaluator();
replaceRightMost = true;
}
- }
- else {
+ } else {
rootEval = currentEval = new CombiningEvaluator.And(evals);
}
evals.clear();
- // for most combinators: change the current eval into an AND of the current eval and the new eval
- if (combinator == '>')
- currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediateParent(currentEval));
- else if (combinator == ' ')
- currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.Parent(currentEval));
- else if (combinator == '+')
- currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.ImmediatePreviousSibling(currentEval));
- else if (combinator == '~')
- currentEval = new CombiningEvaluator.And(newEval, new StructuralEvaluator.PreviousSibling(currentEval));
- else if (combinator == ',') { // group or.
+ // for most combinators: change the current eval into an AND of the
+ // current eval and the new eval
+ if (combinator == '>') {
+ currentEval = new CombiningEvaluator.And(newEval,
+ new StructuralEvaluator.ImmediateParent(currentEval));
+ } else if (combinator == ' ') {
+ currentEval = new CombiningEvaluator.And(newEval,
+ new StructuralEvaluator.Parent(currentEval));
+ } else if (combinator == '+') {
+ currentEval = new CombiningEvaluator.And(newEval,
+ new StructuralEvaluator.ImmediatePreviousSibling(
+ currentEval));
+ } else if (combinator == '~') {
+ currentEval = new CombiningEvaluator.And(newEval,
+ new StructuralEvaluator.PreviousSibling(currentEval));
+ } else if (combinator == ',') { // group or.
CombiningEvaluator.Or or;
if (currentEval instanceof CombiningEvaluator.Or) {
or = (CombiningEvaluator.Or) currentEval;
@@ -112,62 +127,70 @@ class QueryParser {
or.add(newEval);
}
currentEval = or;
+ } else {
+ throw new Selector.SelectorParseException("Unknown combinator: "
+ + combinator);
}
- else
- throw new Selector.SelectorParseException("Unknown combinator: " + combinator);
- if (replaceRightMost)
- ((CombiningEvaluator.Or) rootEval).replaceRightMostEvaluator(currentEval);
- else rootEval = currentEval;
+ if (replaceRightMost) {
+ ((CombiningEvaluator.Or) rootEval)
+ .replaceRightMostEvaluator(currentEval);
+ } else {
+ rootEval = currentEval;
+ }
evals.add(rootEval);
}
private String consumeSubQuery() {
StringBuilder sq = new StringBuilder();
while (!tq.isEmpty()) {
- if (tq.matches("("))
+ if (tq.matches("(")) {
sq.append("(").append(tq.chompBalanced('(', ')')).append(")");
- else if (tq.matches("["))
+ } else if (tq.matches("[")) {
sq.append("[").append(tq.chompBalanced('[', ']')).append("]");
- else if (tq.matchesAny(combinators))
+ } else if (tq.matchesAny(combinators)) {
break;
- else
+ } else {
sq.append(tq.consume());
+ }
}
return sq.toString();
}
private void findElements() {
- if (tq.matchChomp("#"))
+ if (tq.matchChomp("#")) {
byId();
- else if (tq.matchChomp("."))
+ } else if (tq.matchChomp(".")) {
byClass();
- else if (tq.matchesWord())
+ } else if (tq.matchesWord()) {
byTag();
- else if (tq.matches("["))
+ } else if (tq.matches("[")) {
byAttribute();
- else if (tq.matchChomp("*"))
+ } else if (tq.matchChomp("*")) {
allElements();
- else if (tq.matchChomp(":lt("))
+ } else if (tq.matchChomp(":lt(")) {
indexLessThan();
- else if (tq.matchChomp(":gt("))
+ } else if (tq.matchChomp(":gt(")) {
indexGreaterThan();
- else if (tq.matchChomp(":eq("))
+ } else if (tq.matchChomp(":eq(")) {
indexEquals();
- else if (tq.matches(":has("))
+ } else if (tq.matches(":has(")) {
has();
- else if (tq.matches(":contains("))
+ } else if (tq.matches(":contains(")) {
contains(false);
- else if (tq.matches(":containsOwn("))
+ } else if (tq.matches(":containsOwn(")) {
contains(true);
- else if (tq.matches(":matches("))
+ } else if (tq.matches(":matches(")) {
matches(false);
- else if (tq.matches(":matchesOwn("))
+ } else if (tq.matches(":matchesOwn(")) {
matches(true);
- else if (tq.matches(":not("))
+ } else if (tq.matches(":not(")) {
not();
- else // unhandled
- throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
+ } else {
+ throw new Selector.SelectorParseException(
+ "Could not parse query '%s': unexpected token at '%s'",
+ query, tq.remainder());
+ }
}
@@ -187,44 +210,58 @@ class QueryParser {
String tagName = tq.consumeElementSelector();
Validate.notEmpty(tagName);
- // namespaces: if element name is "abc:def", selector must be "abc|def", so flip:
- if (tagName.contains("|"))
+ // namespaces: if element name is "abc:def", selector must be "abc|def",
+ // so flip:
+ if (tagName.contains("|")) {
tagName = tagName.replace("|", ":");
+ }
evals.add(new Evaluator.Tag(tagName.trim().toLowerCase()));
}
private void byAttribute() {
- TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue
- String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
+ TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content
+ // queue
+ String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq,
+ // not,
+ // start,
+ // end,
+ // contain,
+ // match,
+ // (no
+ // val)
Validate.notEmpty(key);
cq.consumeWhitespace();
if (cq.isEmpty()) {
- if (key.startsWith("^"))
+ if (key.startsWith("^")) {
evals.add(new Evaluator.AttributeStarting(key.substring(1)));
- else
+ } else {
evals.add(new Evaluator.Attribute(key));
+ }
} else {
- if (cq.matchChomp("="))
+ if (cq.matchChomp("=")) {
evals.add(new Evaluator.AttributeWithValue(key, cq.remainder()));
-
- else if (cq.matchChomp("!="))
- evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder()));
-
- else if (cq.matchChomp("^="))
- evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder()));
-
- else if (cq.matchChomp("$="))
- evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder()));
-
- else if (cq.matchChomp("*="))
- evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder()));
-
- else if (cq.matchChomp("~="))
- evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder())));
- else
- throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder());
+ } else if (cq.matchChomp("!=")) {
+ evals.add(new Evaluator.AttributeWithValueNot(key, cq
+ .remainder()));
+ } else if (cq.matchChomp("^=")) {
+ evals.add(new Evaluator.AttributeWithValueStarting(key, cq
+ .remainder()));
+ } else if (cq.matchChomp("$=")) {
+ evals.add(new Evaluator.AttributeWithValueEnding(key, cq
+ .remainder()));
+ } else if (cq.matchChomp("*=")) {
+ evals.add(new Evaluator.AttributeWithValueContaining(key, cq
+ .remainder()));
+ } else if (cq.matchChomp("~=")) {
+ evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern
+ .compile(cq.remainder())));
+ } else {
+ throw new Selector.SelectorParseException(
+ "Could not parse attribute query '%s': unexpected token at '%s'",
+ query, cq.remainder());
+ }
}
}
@@ -264,29 +301,33 @@ class QueryParser {
tq.consume(own ? ":containsOwn" : ":contains");
String searchText = TokenQueue.unescape(tq.chompBalanced('(', ')'));
Validate.notEmpty(searchText, ":contains(text) query must not be empty");
- if (own)
+ if (own) {
evals.add(new Evaluator.ContainsOwnText(searchText));
- else
+ } else {
evals.add(new Evaluator.ContainsText(searchText));
+ }
}
// :matches(regex), matchesOwn(regex)
private void matches(boolean own) {
tq.consume(own ? ":matchesOwn" : ":matches");
- String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex bits will be escaped
+ String regex = tq.chompBalanced('(', ')'); // don't unescape, as regex
+ // bits will be escaped
Validate.notEmpty(regex, ":matches(regex) query must not be empty");
- if (own)
+ if (own) {
evals.add(new Evaluator.MatchesOwn(Pattern.compile(regex)));
- else
+ } else {
evals.add(new Evaluator.Matches(Pattern.compile(regex)));
+ }
}
// :not(selector)
private void not() {
tq.consume(":not");
String subQuery = tq.chompBalanced('(', ')');
- Validate.notEmpty(subQuery, ":not(selector) subselect must not be empty");
+ Validate.notEmpty(subQuery,
+ ":not(selector) subselect must not be empty");
evals.add(new StructuralEvaluator.Not(parse(subQuery)));
}
diff --git a/server/src/org/jsoup/select/Selector.java b/server/src/org/jsoup/select/Selector.java
index 8fc6286798..d5ea6f2dc9 100644
--- a/server/src/org/jsoup/select/Selector.java
+++ b/server/src/org/jsoup/select/Selector.java
@@ -1,55 +1,201 @@
package org.jsoup.select;
-import org.jsoup.helper.Validate;
-import org.jsoup.nodes.Element;
-
import java.util.Collection;
import java.util.LinkedHashSet;
+import org.jsoup.helper.Validate;
+import org.jsoup.nodes.Element;
+
/**
* CSS-like element selector, that finds elements matching a query.
* <p/>
* <h2>Selector syntax</h2>
- * A selector is a chain of simple selectors, separated by combinators. Selectors are case insensitive (including against
- * elements, attributes, and attribute values).
+ * A selector is a chain of simple selectors, separated by combinators.
+ * Selectors are case insensitive (including against elements, attributes, and
+ * attribute values).
* <p/>
- * The universal selector (*) is implicit when no element selector is supplied (i.e. {@code *.header} and {@code .header}
- * is equivalent).
+ * The universal selector (*) is implicit when no element selector is supplied
+ * (i.e. {@code *.header} and {@code .header} is equivalent).
* <p/>
* <table>
- * <tr><th>Pattern</th><th>Matches</th><th>Example</th></tr>
- * <tr><td><code>*</code></td><td>any element</td><td><code>*</code></td></tr>
- * <tr><td><code>tag</code></td><td>elements with the given tag name</td><td><code>div</code></td></tr>
- * <tr><td><code>ns|E</code></td><td>elements of type E in the namespace <i>ns</i></td><td><code>fb|name</code> finds <code>&lt;fb:name></code> elements</td></tr>
- * <tr><td><code>#id</code></td><td>elements with attribute ID of "id"</td><td><code>div#wrap</code>, <code>#logo</code></td></tr>
- * <tr><td><code>.class</code></td><td>elements with a class name of "class"</td><td><code>div.left</code>, <code>.result</code></td></tr>
- * <tr><td><code>[attr]</code></td><td>elements with an attribute named "attr" (with any value)</td><td><code>a[href]</code>, <code>[title]</code></td></tr>
- * <tr><td><code>[^attrPrefix]</code></td><td>elements with an attribute name starting with "attrPrefix". Use to find elements with HTML5 datasets</td><td><code>[^data-]</code>, <code>div[^data-]</code></td></tr>
- * <tr><td><code>[attr=val]</code></td><td>elements with an attribute named "attr", and value equal to "val"</td><td><code>img[width=500]</code>, <code>a[rel=nofollow]</code></td></tr>
- * <tr><td><code>[attr^=valPrefix]</code></td><td>elements with an attribute named "attr", and value starting with "valPrefix"</td><td><code>a[href^=http:]</code></code></td></tr>
- * <tr><td><code>[attr$=valSuffix]</code></td><td>elements with an attribute named "attr", and value ending with "valSuffix"</td><td><code>img[src$=.png]</code></td></tr>
- * <tr><td><code>[attr*=valContaining]</code></td><td>elements with an attribute named "attr", and value containing "valContaining"</td><td><code>a[href*=/search/]</code></td></tr>
- * <tr><td><code>[attr~=<em>regex</em>]</code></td><td>elements with an attribute named "attr", and value matching the regular expression</td><td><code>img[src~=(?i)\\.(png|jpe?g)]</code></td></tr>
- * <tr><td></td><td>The above may be combined in any order</td><td><code>div.header[title]</code></td></tr>
- * <tr><td><td colspan="3"><h3>Combinators</h3></td></tr>
- * <tr><td><code>E F</code></td><td>an F element descended from an E element</td><td><code>div a</code>, <code>.logo h1</code></td></tr>
- * <tr><td><code>E > F</code></td><td>an F direct child of E</td><td><code>ol > li</code></td></tr>
- * <tr><td><code>E + F</code></td><td>an F element immediately preceded by sibling E</td><td><code>li + li</code>, <code>div.head + div</code></td></tr>
- * <tr><td><code>E ~ F</code></td><td>an F element preceded by sibling E</td><td><code>h1 ~ p</code></td></tr>
- * <tr><td><code>E, F, G</code></td><td>all matching elements E, F, or G</td><td><code>a[href], div, h3</code></td></tr>
- * <tr><td><td colspan="3"><h3>Pseudo selectors</h3></td></tr>
- * <tr><td><code>:lt(<em>n</em>)</code></td><td>elements whose sibling index is less than <em>n</em></td><td><code>td:lt(3)</code> finds the first 2 cells of each row</td></tr>
- * <tr><td><code>:gt(<em>n</em>)</code></td><td>elements whose sibling index is greater than <em>n</em></td><td><code>td:gt(1)</code> finds cells after skipping the first two</td></tr>
- * <tr><td><code>:eq(<em>n</em>)</code></td><td>elements whose sibling index is equal to <em>n</em></td><td><code>td:eq(0)</code> finds the first cell of each row</td></tr>
- * <tr><td><code>:has(<em>selector</em>)</code></td><td>elements that contains at least one element matching the <em>selector</em></td><td><code>div:has(p)</code> finds divs that contain p elements </td></tr>
- * <tr><td><code>:not(<em>selector</em>)</code></td><td>elements that do not match the <em>selector</em>. See also {@link Elements#not(String)}</td><td><code>div:not(.logo)</code> finds all divs that do not have the "logo" class.<br /><code>div:not(:has(div))</code> finds divs that do not contain divs.</code></td></tr>
- * <tr><td><code>:contains(<em>text</em>)</code></td><td>elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants.</td><td><code>p:contains(jsoup)</code> finds p elements containing the text "jsoup".</td></tr>
- * <tr><td><code>:matches(<em>regex</em>)</code></td><td>elements whose text matches the specified regular expression. The text may appear in the found element, or any of its descendants.</td><td><code>td:matches(\\d+)</code> finds table cells containing digits. <code>div:matches((?i)login)</code> finds divs containing the text, case insensitively.</td></tr>
- * <tr><td><code>:containsOwn(<em>text</em>)</code></td><td>elements that directly contains the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants.</td><td><code>p:containsOwn(jsoup)</code> finds p elements with own text "jsoup".</td></tr>
- * <tr><td><code>:matchesOwn(<em>regex</em>)</code></td><td>elements whose own text matches the specified regular expression. The text must appear in the found element, not any of its descendants.</td><td><code>td:matchesOwn(\\d+)</code> finds table cells directly containing digits. <code>div:matchesOwn((?i)login)</code> finds divs containing the text, case insensitively.</td></tr>
- * <tr><td></td><td>The above may be combined in any order and with other selectors</td><td><code>.light:contains(name):eq(0)</code></td></tr>
+ * <tr>
+ * <th>Pattern</th>
+ * <th>Matches</th>
+ * <th>Example</th>
+ * </tr>
+ * <tr>
+ * <td><code>*</code></td>
+ * <td>any element</td>
+ * <td><code>*</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>tag</code></td>
+ * <td>elements with the given tag name</td>
+ * <td><code>div</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>ns|E</code></td>
+ * <td>elements of type E in the namespace <i>ns</i></td>
+ * <td><code>fb|name</code> finds <code>&lt;fb:name></code> elements</td>
+ * </tr>
+ * <tr>
+ * <td><code>#id</code></td>
+ * <td>elements with attribute ID of "id"</td>
+ * <td><code>div#wrap</code>, <code>#logo</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>.class</code></td>
+ * <td>elements with a class name of "class"</td>
+ * <td><code>div.left</code>, <code>.result</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr]</code></td>
+ * <td>elements with an attribute named "attr" (with any value)</td>
+ * <td><code>a[href]</code>, <code>[title]</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[^attrPrefix]</code></td>
+ * <td>elements with an attribute name starting with "attrPrefix". Use to find
+ * elements with HTML5 datasets</td>
+ * <td><code>[^data-]</code>, <code>div[^data-]</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr=val]</code></td>
+ * <td>elements with an attribute named "attr", and value equal to "val"</td>
+ * <td><code>img[width=500]</code>, <code>a[rel=nofollow]</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr^=valPrefix]</code></td>
+ * <td>elements with an attribute named "attr", and value starting with
+ * "valPrefix"</td>
+ * <td><code>a[href^=http:]</code></code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr$=valSuffix]</code></td>
+ * <td>elements with an attribute named "attr", and value ending with
+ * "valSuffix"</td>
+ * <td><code>img[src$=.png]</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr*=valContaining]</code></td>
+ * <td>elements with an attribute named "attr", and value containing
+ * "valContaining"</td>
+ * <td><code>a[href*=/search/]</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>[attr~=<em>regex</em>]</code></td>
+ * <td>elements with an attribute named "attr", and value matching the regular
+ * expression</td>
+ * <td><code>img[src~=(?i)\\.(png|jpe?g)]</code></td>
+ * </tr>
+ * <tr>
+ * <td></td>
+ * <td>The above may be combined in any order</td>
+ * <td><code>div.header[title]</code></td>
+ * </tr>
+ * <tr>
+ * <td>
+ * <td colspan="3">
+ * <h3>Combinators</h3></td>
+ * </tr>
+ * <tr>
+ * <td><code>E F</code></td>
+ * <td>an F element descended from an E element</td>
+ * <td><code>div a</code>, <code>.logo h1</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>E > F</code></td>
+ * <td>an F direct child of E</td>
+ * <td><code>ol > li</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>E + F</code></td>
+ * <td>an F element immediately preceded by sibling E</td>
+ * <td><code>li + li</code>, <code>div.head + div</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>E ~ F</code></td>
+ * <td>an F element preceded by sibling E</td>
+ * <td><code>h1 ~ p</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>E, F, G</code></td>
+ * <td>all matching elements E, F, or G</td>
+ * <td><code>a[href], div, h3</code></td>
+ * </tr>
+ * <tr>
+ * <td>
+ * <td colspan="3">
+ * <h3>Pseudo selectors</h3></td>
+ * </tr>
+ * <tr>
+ * <td><code>:lt(<em>n</em>)</code></td>
+ * <td>elements whose sibling index is less than <em>n</em></td>
+ * <td><code>td:lt(3)</code> finds the first 2 cells of each row</td>
+ * </tr>
+ * <tr>
+ * <td><code>:gt(<em>n</em>)</code></td>
+ * <td>elements whose sibling index is greater than <em>n</em></td>
+ * <td><code>td:gt(1)</code> finds cells after skipping the first two</td>
+ * </tr>
+ * <tr>
+ * <td><code>:eq(<em>n</em>)</code></td>
+ * <td>elements whose sibling index is equal to <em>n</em></td>
+ * <td><code>td:eq(0)</code> finds the first cell of each row</td>
+ * </tr>
+ * <tr>
+ * <td><code>:has(<em>selector</em>)</code></td>
+ * <td>elements that contains at least one element matching the
+ * <em>selector</em></td>
+ * <td><code>div:has(p)</code> finds divs that contain p elements</td>
+ * </tr>
+ * <tr>
+ * <td><code>:not(<em>selector</em>)</code></td>
+ * <td>elements that do not match the <em>selector</em>. See also
+ * {@link Elements#not(String)}</td>
+ * <td><code>div:not(.logo)</code> finds all divs that do not have the "logo"
+ * class.<br />
+ * <code>div:not(:has(div))</code> finds divs that do not contain divs.</code></td>
+ * </tr>
+ * <tr>
+ * <td><code>:contains(<em>text</em>)</code></td>
+ * <td>elements that contains the specified text. The search is case
+ * insensitive. The text may appear in the found element, or any of its
+ * descendants.</td>
+ * <td><code>p:contains(jsoup)</code> finds p elements containing the text
+ * "jsoup".</td>
+ * </tr>
+ * <tr>
+ * <td><code>:matches(<em>regex</em>)</code></td>
+ * <td>elements whose text matches the specified regular expression. The text
+ * may appear in the found element, or any of its descendants.</td>
+ * <td><code>td:matches(\\d+)</code> finds table cells containing digits.
+ * <code>div:matches((?i)login)</code> finds divs containing the text, case
+ * insensitively.</td>
+ * </tr>
+ * <tr>
+ * <td><code>:containsOwn(<em>text</em>)</code></td>
+ * <td>elements that directly contains the specified text. The search is case
+ * insensitive. The text must appear in the found element, not any of its
+ * descendants.</td>
+ * <td><code>p:containsOwn(jsoup)</code> finds p elements with own text "jsoup".
+ * </td>
+ * </tr>
+ * <tr>
+ * <td><code>:matchesOwn(<em>regex</em>)</code></td>
+ * <td>elements whose own text matches the specified regular expression. The
+ * text must appear in the found element, not any of its descendants.</td>
+ * <td><code>td:matchesOwn(\\d+)</code> finds table cells directly containing
+ * digits. <code>div:matchesOwn((?i)login)</code> finds divs containing the
+ * text, case insensitively.</td>
+ * </tr>
+ * <tr>
+ * <td></td>
+ * <td>The above may be combined in any order and with other selectors</td>
+ * <td><code>.light:contains(name):eq(0)</code></td>
+ * </tr>
* </table>
- *
+ *
* @author Jonathan Hedley, jonathan@hedley.net
* @see Element#select(String)
*/
@@ -63,16 +209,18 @@ public class Selector {
Validate.notEmpty(query);
Validate.notNull(root);
- this.evaluator = QueryParser.parse(query);
+ evaluator = QueryParser.parse(query);
this.root = root;
}
/**
* Find elements matching selector.
- *
- * @param query CSS selector
- * @param root root element to descend into
+ *
+ * @param query
+ * CSS selector
+ * @param root
+ * root element to descend into
* @return matching elements, empty if not
*/
public static Elements select(String query, Element root) {
@@ -81,9 +229,11 @@ public class Selector {
/**
* Find elements matching selector.
- *
- * @param query CSS selector
- * @param roots root elements to descend into
+ *
+ * @param query
+ * CSS selector
+ * @param roots
+ * root elements to descend into
* @return matching elements, empty if not
*/
public static Elements select(String query, Iterable<Element> roots) {
@@ -102,7 +252,8 @@ public class Selector {
}
// exclude set. package open so that Elements can implement .not() selector.
- static Elements filterOut(Collection<Element> elements, Collection<Element> outs) {
+ static Elements filterOut(Collection<Element> elements,
+ Collection<Element> outs) {
Elements output = new Elements();
for (Element el : elements) {
boolean found = false;
@@ -112,8 +263,9 @@ public class Selector {
break;
}
}
- if (!found)
+ if (!found) {
output.add(el);
+ }
}
return output;
}
diff --git a/server/src/org/jsoup/select/StructuralEvaluator.java b/server/src/org/jsoup/select/StructuralEvaluator.java
index 69e8a62e58..dea2413fb8 100644
--- a/server/src/org/jsoup/select/StructuralEvaluator.java
+++ b/server/src/org/jsoup/select/StructuralEvaluator.java
@@ -9,6 +9,7 @@ abstract class StructuralEvaluator extends Evaluator {
Evaluator evaluator;
static class Root extends Evaluator {
+ @Override
public boolean matches(Element root, Element element) {
return root == element;
}
@@ -19,14 +20,17 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element element) {
for (Element e : element.getAllElements()) {
- if (e != element && evaluator.matches(root, e))
+ if (e != element && evaluator.matches(root, e)) {
return true;
+ }
}
return false;
}
+ @Override
public String toString() {
return String.format(":has(%s)", evaluator);
}
@@ -37,10 +41,12 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element node) {
return !evaluator.matches(root, node);
}
+ @Override
public String toString() {
return String.format(":not%s", evaluator);
}
@@ -51,19 +57,23 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element element) {
- if (root == element)
+ if (root == element) {
return false;
+ }
Element parent = element.parent();
while (parent != root) {
- if (evaluator.matches(root, parent))
+ if (evaluator.matches(root, parent)) {
return true;
+ }
parent = parent.parent();
}
return false;
}
+ @Override
public String toString() {
return String.format(":parent%s", evaluator);
}
@@ -74,14 +84,17 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element element) {
- if (root == element)
+ if (root == element) {
return false;
+ }
Element parent = element.parent();
return parent != null && evaluator.matches(root, parent);
}
+ @Override
public String toString() {
return String.format(":ImmediateParent%s", evaluator);
}
@@ -92,21 +105,25 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element element) {
- if (root == element)
+ if (root == element) {
return false;
+ }
Element prev = element.previousElementSibling();
while (prev != null) {
- if (evaluator.matches(root, prev))
+ if (evaluator.matches(root, prev)) {
return true;
+ }
prev = prev.previousElementSibling();
}
return false;
}
+ @Override
public String toString() {
return String.format(":prev*%s", evaluator);
}
@@ -117,14 +134,17 @@ abstract class StructuralEvaluator extends Evaluator {
this.evaluator = evaluator;
}
+ @Override
public boolean matches(Element root, Element element) {
- if (root == element)
+ if (root == element) {
return false;
+ }
Element prev = element.previousElementSibling();
return prev != null && evaluator.matches(root, prev);
}
+ @Override
public String toString() {
return String.format(":prev%s", evaluator);
}