summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/helper
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/helper')
-rw-r--r--server/src/org/jsoup/helper/DataUtil.java135
-rw-r--r--server/src/org/jsoup/helper/DescendableLinkedList.java82
-rw-r--r--server/src/org/jsoup/helper/HttpConnection.java658
-rw-r--r--server/src/org/jsoup/helper/StringUtil.java140
-rw-r--r--server/src/org/jsoup/helper/Validate.java112
5 files changed, 1127 insertions, 0 deletions
diff --git a/server/src/org/jsoup/helper/DataUtil.java b/server/src/org/jsoup/helper/DataUtil.java
new file mode 100644
index 0000000000..9adfe42153
--- /dev/null
+++ b/server/src/org/jsoup/helper/DataUtil.java
@@ -0,0 +1,135 @@
+package org.jsoup.helper;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.parser.Parser;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Internal static utilities for handling data.
+ *
+ */
+public class DataUtil {
+ private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
+ static final String defaultCharset = "UTF-8"; // used if not found in header or meta charset
+ private static final int bufferSize = 0x20000; // ~130K.
+
+ private DataUtil() {}
+
+ /**
+ * Loads a file to a Document.
+ * @param in file to load
+ * @param charsetName character set of input
+ * @param baseUri base URI of document, to resolve relative links against
+ * @return Document
+ * @throws IOException on IO error
+ */
+ public static Document load(File in, String charsetName, String baseUri) throws IOException {
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(in);
+ ByteBuffer byteData = readToByteBuffer(inStream);
+ return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+ } finally {
+ if (inStream != null)
+ inStream.close();
+ }
+ }
+
+ /**
+ * Parses a Document from an input steam.
+ * @param in input stream to parse. You will need to close it.
+ * @param charsetName character set of input
+ * @param baseUri base URI of document, to resolve relative links against
+ * @return Document
+ * @throws IOException on IO error
+ */
+ public static Document load(InputStream in, String charsetName, String baseUri) throws IOException {
+ ByteBuffer byteData = readToByteBuffer(in);
+ return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
+ }
+
+ /**
+ * Parses a Document from an input steam, using the provided Parser.
+ * @param in input stream to parse. You will need to close it.
+ * @param charsetName character set of input
+ * @param baseUri base URI of document, to resolve relative links against
+ * @param parser alternate {@link Parser#xmlParser() parser} to use.
+ * @return Document
+ * @throws IOException on IO error
+ */
+ public static Document load(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
+ ByteBuffer byteData = readToByteBuffer(in);
+ return parseByteData(byteData, charsetName, baseUri, parser);
+ }
+
+ // reads bytes first into a buffer, then decodes with the appropriate charset. done this way to support
+ // switching the chartset midstream when a meta http-equiv tag defines the charset.
+ static Document parseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser) {
+ String docData;
+ Document doc = null;
+ if (charsetName == null) { // determine from meta. safe parse as UTF-8
+ // look for <meta http-equiv="Content-Type" content="text/html;charset=gb2312"> or HTML5 <meta charset="gb2312">
+ docData = Charset.forName(defaultCharset).decode(byteData).toString();
+ doc = parser.parseInput(docData, baseUri);
+ Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first();
+ if (meta != null) { // if not found, will keep utf-8 as best attempt
+ String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta.attr("content")) : meta.attr("charset");
+ if (foundCharset != null && foundCharset.length() != 0 && !foundCharset.equals(defaultCharset)) { // need to re-decode
+ charsetName = foundCharset;
+ byteData.rewind();
+ docData = Charset.forName(foundCharset).decode(byteData).toString();
+ doc = null;
+ }
+ }
+ } else { // specified by content type header (or by user on file load)
+ Validate.notEmpty(charsetName, "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
+ docData = Charset.forName(charsetName).decode(byteData).toString();
+ }
+ if (doc == null) {
+ // there are times where there is a spurious byte-order-mark at the start of the text. Shouldn't be present
+ // in utf-8. If after decoding, there is a BOM, strip it; otherwise will cause the parser to go straight
+ // into head mode
+ if (docData.charAt(0) == 65279)
+ docData = docData.substring(1);
+
+ doc = parser.parseInput(docData, baseUri);
+ doc.outputSettings().charset(charsetName);
+ }
+ return doc;
+ }
+
+ static ByteBuffer readToByteBuffer(InputStream inStream) throws IOException {
+ byte[] buffer = new byte[bufferSize];
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
+ int read;
+ while(true) {
+ read = inStream.read(buffer);
+ if (read == -1) break;
+ outStream.write(buffer, 0, read);
+ }
+ ByteBuffer byteData = ByteBuffer.wrap(outStream.toByteArray());
+ return byteData;
+ }
+
+ /**
+ * Parse out a charset from a content type header.
+ * @param contentType e.g. "text/html; charset=EUC-JP"
+ * @return "EUC-JP", or null if not found. Charset is trimmed and uppercased.
+ */
+ static String getCharsetFromContentType(String contentType) {
+ if (contentType == null) return null;
+ Matcher m = charsetPattern.matcher(contentType);
+ if (m.find()) {
+ return m.group(1).trim().toUpperCase();
+ }
+ return null;
+ }
+
+
+}
diff --git a/server/src/org/jsoup/helper/DescendableLinkedList.java b/server/src/org/jsoup/helper/DescendableLinkedList.java
new file mode 100644
index 0000000000..28ca1971eb
--- /dev/null
+++ b/server/src/org/jsoup/helper/DescendableLinkedList.java
@@ -0,0 +1,82 @@
+package org.jsoup.helper;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+/**
+ * Provides a descending iterator and other 1.6 methods to allow support on the 1.5 JRE.
+ */
+public class DescendableLinkedList<E> extends LinkedList<E> {
+
+ /**
+ * Create a new DescendableLinkedList.
+ */
+ public DescendableLinkedList() {
+ super();
+ }
+
+ /**
+ * Add a new element to the start of the list.
+ * @param e element to add
+ */
+ public void push(E e) {
+ addFirst(e);
+ }
+
+ /**
+ * Look at the last element, if there is one.
+ * @return the last element, or null
+ */
+ public E peekLast() {
+ return size() == 0 ? null : getLast();
+ }
+
+ /**
+ * Remove and return the last element, if there is one
+ * @return the last element, or null
+ */
+ public E pollLast() {
+ return size() == 0 ? null : removeLast();
+ }
+
+ /**
+ * Get an iterator that starts and the end of the list and works towards the start.
+ * @return an iterator that starts and the end of the list and works towards the start.
+ */
+ public Iterator<E> descendingIterator() {
+ return new DescendingIterator<E>(size());
+ }
+
+ private class DescendingIterator<E> implements Iterator<E> {
+ private final ListIterator<E> iter;
+
+ @SuppressWarnings("unchecked")
+ private DescendingIterator(int index) {
+ iter = (ListIterator<E>) listIterator(index);
+ }
+
+ /**
+ * Check if there is another element on the list.
+ * @return if another element
+ */
+ public boolean hasNext() {
+ return iter.hasPrevious();
+ }
+
+ /**
+ * Get the next element.
+ * @return the next element.
+ */
+ public E next() {
+ return iter.previous();
+ }
+
+ /**
+ * Remove the current element.
+ */
+ public void remove() {
+ iter.remove();
+ }
+ }
+}
diff --git a/server/src/org/jsoup/helper/HttpConnection.java b/server/src/org/jsoup/helper/HttpConnection.java
new file mode 100644
index 0000000000..06200a2547
--- /dev/null
+++ b/server/src/org/jsoup/helper/HttpConnection.java
@@ -0,0 +1,658 @@
+package org.jsoup.helper;
+
+import org.jsoup.Connection;
+import org.jsoup.nodes.Document;
+import org.jsoup.parser.Parser;
+import org.jsoup.parser.TokenQueue;
+
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.*;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Implementation of {@link Connection}.
+ * @see org.jsoup.Jsoup#connect(String)
+ */
+public class HttpConnection implements Connection {
+ public static Connection connect(String url) {
+ Connection con = new HttpConnection();
+ con.url(url);
+ return con;
+ }
+
+ public static Connection connect(URL url) {
+ Connection con = new HttpConnection();
+ con.url(url);
+ return con;
+ }
+
+ private Connection.Request req;
+ private Connection.Response res;
+
+ private HttpConnection() {
+ req = new Request();
+ res = new Response();
+ }
+
+ public Connection url(URL url) {
+ req.url(url);
+ return this;
+ }
+
+ public Connection url(String url) {
+ Validate.notEmpty(url, "Must supply a valid URL");
+ try {
+ req.url(new URL(url));
+ } catch (MalformedURLException e) {
+ throw new IllegalArgumentException("Malformed URL: " + url, e);
+ }
+ return this;
+ }
+
+ public Connection userAgent(String userAgent) {
+ Validate.notNull(userAgent, "User agent must not be null");
+ req.header("User-Agent", userAgent);
+ return this;
+ }
+
+ public Connection timeout(int millis) {
+ req.timeout(millis);
+ return this;
+ }
+
+ public Connection followRedirects(boolean followRedirects) {
+ req.followRedirects(followRedirects);
+ return this;
+ }
+
+ public Connection referrer(String referrer) {
+ Validate.notNull(referrer, "Referrer must not be null");
+ req.header("Referer", referrer);
+ return this;
+ }
+
+ public Connection method(Method method) {
+ req.method(method);
+ return this;
+ }
+
+ public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
+ req.ignoreHttpErrors(ignoreHttpErrors);
+ return this;
+ }
+
+ public Connection ignoreContentType(boolean ignoreContentType) {
+ req.ignoreContentType(ignoreContentType);
+ return this;
+ }
+
+ public Connection data(String key, String value) {
+ req.data(KeyVal.create(key, value));
+ return this;
+ }
+
+ public Connection data(Map<String, String> data) {
+ Validate.notNull(data, "Data map must not be null");
+ for (Map.Entry<String, String> entry : data.entrySet()) {
+ req.data(KeyVal.create(entry.getKey(), entry.getValue()));
+ }
+ return this;
+ }
+
+ public Connection data(String... keyvals) {
+ Validate.notNull(keyvals, "Data key value pairs must not be null");
+ Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
+ for (int i = 0; i < keyvals.length; i += 2) {
+ String key = keyvals[i];
+ String value = keyvals[i+1];
+ Validate.notEmpty(key, "Data key must not be empty");
+ Validate.notNull(value, "Data value must not be null");
+ req.data(KeyVal.create(key, value));
+ }
+ return this;
+ }
+
+ public Connection header(String name, String value) {
+ req.header(name, value);
+ return this;
+ }
+
+ public Connection cookie(String name, String value) {
+ req.cookie(name, value);
+ return this;
+ }
+
+ public Connection cookies(Map<String, String> cookies) {
+ Validate.notNull(cookies, "Cookie map must not be null");
+ for (Map.Entry<String, String> entry : cookies.entrySet()) {
+ req.cookie(entry.getKey(), entry.getValue());
+ }
+ return this;
+ }
+
+ public Connection parser(Parser parser) {
+ req.parser(parser);
+ return this;
+ }
+
+ public Document get() throws IOException {
+ req.method(Method.GET);
+ execute();
+ return res.parse();
+ }
+
+ public Document post() throws IOException {
+ req.method(Method.POST);
+ execute();
+ return res.parse();
+ }
+
+ public Connection.Response execute() throws IOException {
+ res = Response.execute(req);
+ return res;
+ }
+
+ public Connection.Request request() {
+ return req;
+ }
+
+ public Connection request(Connection.Request request) {
+ req = request;
+ return this;
+ }
+
+ public Connection.Response response() {
+ return res;
+ }
+
+ public Connection response(Connection.Response response) {
+ res = response;
+ return this;
+ }
+
+ @SuppressWarnings({"unchecked"})
+ private static abstract class Base<T extends Connection.Base> implements Connection.Base<T> {
+ URL url;
+ Method method;
+ Map<String, String> headers;
+ Map<String, String> cookies;
+
+ private Base() {
+ headers = new LinkedHashMap<String, String>();
+ cookies = new LinkedHashMap<String, String>();
+ }
+
+ public URL url() {
+ return url;
+ }
+
+ public T url(URL url) {
+ Validate.notNull(url, "URL must not be null");
+ this.url = url;
+ return (T) this;
+ }
+
+ public Method method() {
+ return method;
+ }
+
+ public T method(Method method) {
+ Validate.notNull(method, "Method must not be null");
+ this.method = method;
+ return (T) this;
+ }
+
+ public String header(String name) {
+ Validate.notNull(name, "Header name must not be null");
+ return getHeaderCaseInsensitive(name);
+ }
+
+ public T header(String name, String value) {
+ Validate.notEmpty(name, "Header name must not be empty");
+ Validate.notNull(value, "Header value must not be null");
+ removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
+ headers.put(name, value);
+ return (T) this;
+ }
+
+ public boolean hasHeader(String name) {
+ Validate.notEmpty(name, "Header name must not be empty");
+ return getHeaderCaseInsensitive(name) != null;
+ }
+
+ public T removeHeader(String name) {
+ Validate.notEmpty(name, "Header name must not be empty");
+ Map.Entry<String, String> entry = scanHeaders(name); // remove is case insensitive too
+ if (entry != null)
+ headers.remove(entry.getKey()); // ensures correct case
+ return (T) this;
+ }
+
+ public Map<String, String> headers() {
+ return headers;
+ }
+
+ private String getHeaderCaseInsensitive(String name) {
+ Validate.notNull(name, "Header name must not be null");
+ // quick evals for common case of title case, lower case, then scan for mixed
+ String value = headers.get(name);
+ if (value == null)
+ value = headers.get(name.toLowerCase());
+ if (value == null) {
+ Map.Entry<String, String> entry = scanHeaders(name);
+ if (entry != null)
+ value = entry.getValue();
+ }
+ return value;
+ }
+
+ private Map.Entry<String, String> scanHeaders(String name) {
+ String lc = name.toLowerCase();
+ for (Map.Entry<String, String> entry : headers.entrySet()) {
+ if (entry.getKey().toLowerCase().equals(lc))
+ return entry;
+ }
+ return null;
+ }
+
+ public String cookie(String name) {
+ Validate.notNull(name, "Cookie name must not be null");
+ return cookies.get(name);
+ }
+
+ public T cookie(String name, String value) {
+ Validate.notEmpty(name, "Cookie name must not be empty");
+ Validate.notNull(value, "Cookie value must not be null");
+ cookies.put(name, value);
+ return (T) this;
+ }
+
+ public boolean hasCookie(String name) {
+ Validate.notEmpty("Cookie name must not be empty");
+ return cookies.containsKey(name);
+ }
+
+ public T removeCookie(String name) {
+ Validate.notEmpty("Cookie name must not be empty");
+ cookies.remove(name);
+ return (T) this;
+ }
+
+ public Map<String, String> cookies() {
+ return cookies;
+ }
+ }
+
+ public static class Request extends Base<Connection.Request> implements Connection.Request {
+ private int timeoutMilliseconds;
+ private boolean followRedirects;
+ private Collection<Connection.KeyVal> data;
+ private boolean ignoreHttpErrors = false;
+ private boolean ignoreContentType = false;
+ private Parser parser;
+
+ private Request() {
+ timeoutMilliseconds = 3000;
+ followRedirects = true;
+ data = new ArrayList<Connection.KeyVal>();
+ method = Connection.Method.GET;
+ headers.put("Accept-Encoding", "gzip");
+ parser = Parser.htmlParser();
+ }
+
+ public int timeout() {
+ return timeoutMilliseconds;
+ }
+
+ public Request timeout(int millis) {
+ Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
+ timeoutMilliseconds = millis;
+ return this;
+ }
+
+ public boolean followRedirects() {
+ return followRedirects;
+ }
+
+ public Connection.Request followRedirects(boolean followRedirects) {
+ this.followRedirects = followRedirects;
+ return this;
+ }
+
+ public boolean ignoreHttpErrors() {
+ return ignoreHttpErrors;
+ }
+
+ public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
+ this.ignoreHttpErrors = ignoreHttpErrors;
+ return this;
+ }
+
+ public boolean ignoreContentType() {
+ return ignoreContentType;
+ }
+
+ public Connection.Request ignoreContentType(boolean ignoreContentType) {
+ this.ignoreContentType = ignoreContentType;
+ return this;
+ }
+
+ public Request data(Connection.KeyVal keyval) {
+ Validate.notNull(keyval, "Key val must not be null");
+ data.add(keyval);
+ return this;
+ }
+
+ public Collection<Connection.KeyVal> data() {
+ return data;
+ }
+
+ public Request parser(Parser parser) {
+ this.parser = parser;
+ return this;
+ }
+
+ public Parser parser() {
+ return parser;
+ }
+ }
+
+ public static class Response extends Base<Connection.Response> implements Connection.Response {
+ private static final int MAX_REDIRECTS = 20;
+ private int statusCode;
+ private String statusMessage;
+ private ByteBuffer byteData;
+ private String charset;
+ private String contentType;
+ private boolean executed = false;
+ private int numRedirects = 0;
+ private Connection.Request req;
+
+ Response() {
+ super();
+ }
+
+ private Response(Response previousResponse) throws IOException {
+ super();
+ if (previousResponse != null) {
+ numRedirects = previousResponse.numRedirects + 1;
+ if (numRedirects >= MAX_REDIRECTS)
+ throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
+ }
+ }
+
+ static Response execute(Connection.Request req) throws IOException {
+ return execute(req, null);
+ }
+
+ static Response execute(Connection.Request req, Response previousResponse) throws IOException {
+ Validate.notNull(req, "Request must not be null");
+ String protocol = req.url().getProtocol();
+ Validate
+ .isTrue(protocol.equals("http") || protocol.equals("https"), "Only http & https protocols supported");
+
+ // set up the request for execution
+ if (req.method() == Connection.Method.GET && req.data().size() > 0)
+ serialiseRequestUrl(req); // appends query string
+ HttpURLConnection conn = createConnection(req);
+ conn.connect();
+ if (req.method() == Connection.Method.POST)
+ writePost(req.data(), conn.getOutputStream());
+
+ int status = conn.getResponseCode();
+ boolean needsRedirect = false;
+ if (status != HttpURLConnection.HTTP_OK) {
+ if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER)
+ needsRedirect = true;
+ else if (!req.ignoreHttpErrors())
+ throw new IOException(status + " error loading URL " + req.url().toString());
+ }
+ Response res = new Response(previousResponse);
+ res.setupFromConnection(conn, previousResponse);
+ if (needsRedirect && req.followRedirects()) {
+ req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
+ req.data().clear();
+ req.url(new URL(req.url(), res.header("Location")));
+ for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add response cookies to request (for e.g. login posts)
+ req.cookie(cookie.getKey(), cookie.getValue());
+ }
+ return execute(req, res);
+ }
+ res.req = req;
+
+ InputStream bodyStream = null;
+ InputStream dataStream = null;
+ try {
+ dataStream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
+ bodyStream = res.hasHeader("Content-Encoding") && res.header("Content-Encoding").equalsIgnoreCase("gzip") ?
+ new BufferedInputStream(new GZIPInputStream(dataStream)) :
+ new BufferedInputStream(dataStream);
+
+ res.byteData = DataUtil.readToByteBuffer(bodyStream);
+ res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
+ } finally {
+ if (bodyStream != null) bodyStream.close();
+ if (dataStream != null) dataStream.close();
+ }
+
+ res.executed = true;
+ return res;
+ }
+
+ public int statusCode() {
+ return statusCode;
+ }
+
+ public String statusMessage() {
+ return statusMessage;
+ }
+
+ public String charset() {
+ return charset;
+ }
+
+ public String contentType() {
+ return contentType;
+ }
+
+ public Document parse() throws IOException {
+ Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
+ if (!req.ignoreContentType() && (contentType == null || !(contentType.startsWith("text/") || contentType.startsWith("application/xml") || contentType.startsWith("application/xhtml+xml"))))
+ throw new IOException(String.format("Unhandled content type \"%s\" on URL %s. Must be text/*, application/xml, or application/xhtml+xml",
+ contentType, url.toString()));
+ Document doc = DataUtil.parseByteData(byteData, charset, url.toExternalForm(), req.parser());
+ byteData.rewind();
+ charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
+ return doc;
+ }
+
+ public String body() {
+ Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
+ // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
+ String body;
+ if (charset == null)
+ body = Charset.forName(DataUtil.defaultCharset).decode(byteData).toString();
+ else
+ body = Charset.forName(charset).decode(byteData).toString();
+ byteData.rewind();
+ return body;
+ }
+
+ public byte[] bodyAsBytes() {
+ Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
+ return byteData.array();
+ }
+
+ // set up connection defaults, and details from request
+ private static HttpURLConnection createConnection(Connection.Request req) throws IOException {
+ HttpURLConnection conn = (HttpURLConnection) req.url().openConnection();
+ conn.setRequestMethod(req.method().name());
+ conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
+ conn.setConnectTimeout(req.timeout());
+ conn.setReadTimeout(req.timeout());
+ if (req.method() == Method.POST)
+ conn.setDoOutput(true);
+ if (req.cookies().size() > 0)
+ conn.addRequestProperty("Cookie", getRequestCookieString(req));
+ for (Map.Entry<String, String> header : req.headers().entrySet()) {
+ conn.addRequestProperty(header.getKey(), header.getValue());
+ }
+ return conn;
+ }
+
+ // set up url, method, header, cookies
+ private void setupFromConnection(HttpURLConnection conn, Connection.Response previousResponse) throws IOException {
+ method = Connection.Method.valueOf(conn.getRequestMethod());
+ url = conn.getURL();
+ statusCode = conn.getResponseCode();
+ statusMessage = conn.getResponseMessage();
+ contentType = conn.getContentType();
+
+ Map<String, List<String>> resHeaders = conn.getHeaderFields();
+ processResponseHeaders(resHeaders);
+
+ // if from a redirect, map previous response cookies into this response
+ if (previousResponse != null) {
+ for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
+ if (!hasCookie(prevCookie.getKey()))
+ cookie(prevCookie.getKey(), prevCookie.getValue());
+ }
+ }
+ }
+
+ void processResponseHeaders(Map<String, List<String>> resHeaders) {
+ for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
+ String name = entry.getKey();
+ if (name == null)
+ continue; // http/1.1 line
+
+ List<String> values = entry.getValue();
+ if (name.equalsIgnoreCase("Set-Cookie")) {
+ for (String value : values) {
+ if (value == null)
+ continue;
+ TokenQueue cd = new TokenQueue(value);
+ String cookieName = cd.chompTo("=").trim();
+ String cookieVal = cd.consumeTo(";").trim();
+ if (cookieVal == null)
+ cookieVal = "";
+ // ignores path, date, domain, secure et al. req'd?
+ // name not blank, value not null
+ if (cookieName != null && cookieName.length() > 0)
+ cookie(cookieName, cookieVal);
+ }
+ } else { // only take the first instance of each header
+ if (!values.isEmpty())
+ header(name, values.get(0));
+ }
+ }
+ }
+
+ private static void writePost(Collection<Connection.KeyVal> data, OutputStream outputStream) throws IOException {
+ OutputStreamWriter w = new OutputStreamWriter(outputStream, DataUtil.defaultCharset);
+ boolean first = true;
+ for (Connection.KeyVal keyVal : data) {
+ if (!first)
+ w.append('&');
+ else
+ first = false;
+
+ w.write(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset));
+ w.write('=');
+ w.write(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
+ }
+ w.close();
+ }
+
+ private static String getRequestCookieString(Connection.Request req) {
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for (Map.Entry<String, String> cookie : req.cookies().entrySet()) {
+ if (!first)
+ sb.append("; ");
+ else
+ first = false;
+ sb.append(cookie.getKey()).append('=').append(cookie.getValue());
+ // todo: spec says only ascii, no escaping / encoding defined. validate on set? or escape somehow here?
+ }
+ return sb.toString();
+ }
+
+ // for get url reqs, serialise the data map into the url
+ private static void serialiseRequestUrl(Connection.Request req) throws IOException {
+ URL in = req.url();
+ StringBuilder url = new StringBuilder();
+ boolean first = true;
+ // reconstitute the query, ready for appends
+ url
+ .append(in.getProtocol())
+ .append("://")
+ .append(in.getAuthority()) // includes host, port
+ .append(in.getPath())
+ .append("?");
+ if (in.getQuery() != null) {
+ url.append(in.getQuery());
+ first = false;
+ }
+ for (Connection.KeyVal keyVal : req.data()) {
+ if (!first)
+ url.append('&');
+ else
+ first = false;
+ url
+ .append(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
+ .append('=')
+ .append(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
+ }
+ req.url(new URL(url.toString()));
+ req.data().clear(); // moved into url as get params
+ }
+ }
+
+ public static class KeyVal implements Connection.KeyVal {
+ private String key;
+ private String value;
+
+ public static KeyVal create(String key, String value) {
+ Validate.notEmpty(key, "Data key must not be empty");
+ Validate.notNull(value, "Data value must not be null");
+ return new KeyVal(key, value);
+ }
+
+ private KeyVal(String key, String value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ public KeyVal key(String key) {
+ Validate.notEmpty(key, "Data key must not be empty");
+ this.key = key;
+ return this;
+ }
+
+ public String key() {
+ return key;
+ }
+
+ public KeyVal value(String value) {
+ Validate.notNull(value, "Data value must not be null");
+ this.value = value;
+ return this;
+ }
+
+ public String value() {
+ return value;
+ }
+
+ @Override
+ public String toString() {
+ return key + "=" + value;
+ }
+ }
+}
diff --git a/server/src/org/jsoup/helper/StringUtil.java b/server/src/org/jsoup/helper/StringUtil.java
new file mode 100644
index 0000000000..071a92c7a5
--- /dev/null
+++ b/server/src/org/jsoup/helper/StringUtil.java
@@ -0,0 +1,140 @@
+package org.jsoup.helper;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * A minimal String utility class. Designed for internal jsoup use only.
+ */
+public final class StringUtil {
+ // memoised padding up to 10
+ private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
+
+ /**
+ * Join a collection of strings by a seperator
+ * @param strings collection of string objects
+ * @param sep string to place between strings
+ * @return joined string
+ */
+ public static String join(Collection strings, String sep) {
+ return join(strings.iterator(), sep);
+ }
+
+ /**
+ * Join a collection of strings by a seperator
+ * @param strings iterator of string objects
+ * @param sep string to place between strings
+ * @return joined string
+ */
+ public static String join(Iterator strings, String sep) {
+ if (!strings.hasNext())
+ return "";
+
+ String start = strings.next().toString();
+ if (!strings.hasNext()) // only one, avoid builder
+ return start;
+
+ StringBuilder sb = new StringBuilder(64).append(start);
+ while (strings.hasNext()) {
+ sb.append(sep);
+ sb.append(strings.next());
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns space padding
+ * @param width amount of padding desired
+ * @return string of spaces * width
+ */
+ public static String padding(int width) {
+ if (width < 0)
+ throw new IllegalArgumentException("width must be > 0");
+
+ if (width < padding.length)
+ return padding[width];
+
+ char[] out = new char[width];
+ for (int i = 0; i < width; i++)
+ out[i] = ' ';
+ return String.valueOf(out);
+ }
+
+ /**
+ * Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
+ * @param string string to test
+ * @return if string is blank
+ */
+ public static boolean isBlank(String string) {
+ if (string == null || string.length() == 0)
+ return true;
+
+ int l = string.length();
+ for (int i = 0; i < l; i++) {
+ if (!StringUtil.isWhitespace(string.codePointAt(i)))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Tests if a string is numeric, i.e. contains only digit characters
+ * @param string string to test
+ * @return true if only digit chars, false if empty or null or contains non-digit chrs
+ */
+ public static boolean isNumeric(String string) {
+ if (string == null || string.length() == 0)
+ return false;
+
+ int l = string.length();
+ for (int i = 0; i < l; i++) {
+ if (!Character.isDigit(string.codePointAt(i)))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Tests if a code point is "whitespace" as defined in the HTML spec.
+ * @param c code point to test
+ * @return true if code point is whitespace, false otherwise
+ */
+ public static boolean isWhitespace(int c){
+ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
+ }
+
+ public static String normaliseWhitespace(String string) {
+ StringBuilder sb = new StringBuilder(string.length());
+
+ boolean lastWasWhite = false;
+ boolean modified = false;
+
+ int l = string.length();
+ for (int i = 0; i < l; i++) {
+ int c = string.codePointAt(i);
+ if (isWhitespace(c)) {
+ if (lastWasWhite) {
+ modified = true;
+ continue;
+ }
+ if (c != ' ')
+ modified = true;
+ sb.append(' ');
+ lastWasWhite = true;
+ }
+ else {
+ sb.appendCodePoint(c);
+ lastWasWhite = false;
+ }
+ }
+ return modified ? sb.toString() : string;
+ }
+
+ public static boolean in(String needle, String... haystack) {
+ for (String hay : haystack) {
+ if (hay.equals(needle))
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/server/src/org/jsoup/helper/Validate.java b/server/src/org/jsoup/helper/Validate.java
new file mode 100644
index 0000000000..814bcc3a40
--- /dev/null
+++ b/server/src/org/jsoup/helper/Validate.java
@@ -0,0 +1,112 @@
+package org.jsoup.helper;
+
+/**
+ * Simple validation methods. Designed for jsoup internal use
+ */
+public final class Validate {
+
+ private Validate() {}
+
+ /**
+ * Validates that the object is not null
+ * @param obj object to test
+ */
+ public static void notNull(Object obj) {
+ if (obj == null)
+ throw new IllegalArgumentException("Object must not be null");
+ }
+
+ /**
+ * Validates that the object is not null
+ * @param obj object to test
+ * @param msg message to output if validation fails
+ */
+ public static void notNull(Object obj, String msg) {
+ if (obj == null)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the value is true
+ * @param val object to test
+ */
+ public static void isTrue(boolean val) {
+ if (!val)
+ throw new IllegalArgumentException("Must be true");
+ }
+
+ /**
+ * Validates that the value is true
+ * @param val object to test
+ * @param msg message to output if validation fails
+ */
+ public static void isTrue(boolean val, String msg) {
+ if (!val)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the value is false
+ * @param val object to test
+ */
+ public static void isFalse(boolean val) {
+ if (val)
+ throw new IllegalArgumentException("Must be false");
+ }
+
+ /**
+ * Validates that the value is false
+ * @param val object to test
+ * @param msg message to output if validation fails
+ */
+ public static void isFalse(boolean val, String msg) {
+ if (val)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the array contains no null elements
+ * @param objects the array to test
+ */
+ public static void noNullElements(Object[] objects) {
+ noNullElements(objects, "Array must not contain any null objects");
+ }
+
+ /**
+ * Validates that the array contains no null elements
+ * @param objects the array to test
+ * @param msg message to output if validation fails
+ */
+ public static void noNullElements(Object[] objects, String msg) {
+ for (Object obj : objects)
+ if (obj == null)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ * Validates that the string is not empty
+ * @param string the string to test
+ */
+ public static void notEmpty(String string) {
+ if (string == null || string.length() == 0)
+ throw new IllegalArgumentException("String must not be empty");
+ }
+
+ /**
+ * Validates that the string is not empty
+ * @param string the string to test
+ * @param msg message to output if validation fails
+ */
+ public static void notEmpty(String string, String msg) {
+ if (string == null || string.length() == 0)
+ throw new IllegalArgumentException(msg);
+ }
+
+ /**
+ Cause a failure.
+ @param msg message to output.
+ */
+ public static void fail(String msg) {
+ throw new IllegalArgumentException(msg);
+ }
+}