diff options
Diffstat (limited to 'server/src/org/jsoup/parser/Parser.java')
-rw-r--r-- | server/src/org/jsoup/parser/Parser.java | 123 |
1 files changed, 82 insertions, 41 deletions
diff --git a/server/src/org/jsoup/parser/Parser.java b/server/src/org/jsoup/parser/Parser.java index 2236219c06..a1f6fd5184 100644 --- a/server/src/org/jsoup/parser/Parser.java +++ b/server/src/org/jsoup/parser/Parser.java @@ -1,32 +1,36 @@ package org.jsoup.parser; +import java.util.List; + import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; -import java.util.List; - /** - * Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the more convenient parse methods - * in {@link org.jsoup.Jsoup}. + * Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use + * one of the more convenient parse methods in {@link org.jsoup.Jsoup}. */ public class Parser { - private static final int DEFAULT_MAX_ERRORS = 0; // by default, error tracking is disabled. - + private static final int DEFAULT_MAX_ERRORS = 0; // by default, error + // tracking is disabled. + private TreeBuilder treeBuilder; private int maxErrors = DEFAULT_MAX_ERRORS; private ParseErrorList errors; /** * Create a new Parser, using the specified TreeBuilder - * @param treeBuilder TreeBuilder to use to parse input into Documents. + * + * @param treeBuilder + * TreeBuilder to use to parse input into Documents. */ public Parser(TreeBuilder treeBuilder) { this.treeBuilder = treeBuilder; } - + public Document parseInput(String html, String baseUri) { - errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking(); + errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) + : ParseErrorList.noTracking(); Document doc = treeBuilder.parse(html, baseUri, errors); return doc; } @@ -34,6 +38,7 @@ public class Parser { // gets & sets /** * Get the TreeBuilder currently in use. + * * @return current TreeBuilder. */ public TreeBuilder getTreeBuilder() { @@ -42,7 +47,9 @@ public class Parser { /** * Update the TreeBuilder used when parsing content. - * @param treeBuilder current TreeBuilder + * + * @param treeBuilder + * current TreeBuilder * @return this, for chaining */ public Parser setTreeBuilder(TreeBuilder treeBuilder) { @@ -52,6 +59,7 @@ public class Parser { /** * Check if parse error tracking is enabled. + * * @return current track error state. */ public boolean isTrackErrors() { @@ -60,7 +68,9 @@ public class Parser { /** * Enable or disable parse error tracking for the next parse. - * @param maxErrors the maximum number of errors to track. Set to 0 to disable. + * + * @param maxErrors + * the maximum number of errors to track. Set to 0 to disable. * @return this, for chaining */ public Parser setTrackErrors(int maxErrors) { @@ -70,7 +80,9 @@ public class Parser { /** * Retrieve the parse errors, if any, from the last parse. - * @return list of parse errors, up to the size of the maximum errors tracked. + * + * @return list of parse errors, up to the size of the maximum errors + * tracked. */ public List<ParseError> getErrors() { return errors; @@ -79,10 +91,13 @@ public class Parser { // static parse functions below /** * Parse HTML into a Document. - * - * @param html HTML to parse - * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. - * + * + * @param html + * HTML to parse + * @param baseUri + * base URI of document (i.e. original fetch location), for + * resolving relative URLs. + * * @return parsed Document */ public static Document parse(String html, String baseUri) { @@ -91,33 +106,49 @@ public class Parser { } /** - * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context. - * - * @param fragmentHtml the fragment of HTML to parse - * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This - * provides stack context (for implicit element creation). - * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. - * - * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified. + * Parse a fragment of HTML into a list of nodes. The context element, if + * supplied, supplies parsing context. + * + * @param fragmentHtml + * the fragment of HTML to parse + * @param context + * (optional) the element that this HTML fragment is being parsed + * for (i.e. for inner HTML). This provides stack context (for + * implicit element creation). + * @param baseUri + * base URI of document (i.e. original fetch location), for + * resolving relative URLs. + * + * @return list of nodes parsed from the input HTML. Note that the context + * element, if supplied, is not modified. */ - public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) { + public static List<Node> parseFragment(String fragmentHtml, + Element context, String baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); - return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking()); + return treeBuilder.parseFragment(fragmentHtml, context, baseUri, + ParseErrorList.noTracking()); } /** * Parse a fragment of HTML into the {@code body} of a Document. - * - * @param bodyHtml fragment of HTML - * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. - * + * + * @param bodyHtml + * fragment of HTML + * @param baseUri + * base URI of document (i.e. original fetch location), for + * resolving relative URLs. + * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); - Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented + Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node + // list gets + // modified + // when + // re-parented for (Node node : nodes) { body.appendChild(node); } @@ -125,21 +156,29 @@ public class Parser { } /** - * @param bodyHtml HTML to parse - * @param baseUri baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. - * + * @param bodyHtml + * HTML to parse + * @param baseUri + * baseUri base URI of document (i.e. original fetch location), + * for resolving relative URLs. + * * @return parsed Document - * @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment} instead. + * @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment} + * instead. */ - public static Document parseBodyFragmentRelaxed(String bodyHtml, String baseUri) { + @Deprecated + public static Document parseBodyFragmentRelaxed(String bodyHtml, + String baseUri) { return parse(bodyHtml, baseUri); } - + // builders /** - * Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document, - * based on a knowledge of the semantics of the incoming tags. + * Create a new HTML parser. This parser treats input as HTML5, and enforces + * the creation of a normalised document, based on a knowledge of the + * semantics of the incoming tags. + * * @return a new HTML parser. */ public static Parser htmlParser() { @@ -147,8 +186,10 @@ public class Parser { } /** - * Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML, - * rather creates a simple tree directly from the input. + * Create a new XML parser. This parser assumes no knowledge of the incoming + * tags and does not treat it as HTML, rather creates a simple tree directly + * from the input. + * * @return a new simple XML parser. */ public static Parser xmlParser() { |