1 files changed, 0 insertions, 186 deletions
diff --git a/server/src/org/jsoup/helper/DataUtil.java b/server/src/org/jsoup/helper/DataUtil.java
deleted file mode 100644
index 26b85ea7dc..0000000000
--- a/server/src/org/jsoup/helper/DataUtil.java
+++ /dev/null
@@ -1,186 +0,0 @@
-package org.jsoup.helper;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.parser.Parser;
-
-/**
- * Internal static utilities for handling data.
- * 
- */
-public class DataUtil {
-    private static final Pattern charsetPattern = Pattern
-            .compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
-    static final String defaultCharset = "UTF-8"; // used if not found in header
-                                                  // or meta charset
-    private static final int bufferSize = 0x20000; // ~130K.
-
-    private DataUtil() {
-    }
-
-    /**
-     * Loads a file to a Document.
-     * 
-     * @param in
-     *            file to load
-     * @param charsetName
-     *            character set of input
-     * @param baseUri
-     *            base URI of document, to resolve relative links against
-     * @return Document
-     * @throws IOException
-     *             on IO error
-     */
-    public static Document load(File in, String charsetName, String baseUri)
-            throws IOException {
-        FileInputStream inStream = null;
-        try {
-            inStream = new FileInputStream(in);
-            ByteBuffer byteData = readToByteBuffer(inStream);
-            return parseByteData(byteData, charsetName, baseUri,
-                    Parser.htmlParser());
-        } finally {
-            if (inStream != null) {
-                inStream.close();
-            }
-        }
-    }
-
-    /**
-     * Parses a Document from an input steam.
-     * 
-     * @param in
-     *            input stream to parse. You will need to close it.
-     * @param charsetName
-     *            character set of input
-     * @param baseUri
-     *            base URI of document, to resolve relative links against
-     * @return Document
-     * @throws IOException
-     *             on IO error
-     */
-    public static Document load(InputStream in, String charsetName,
-            String baseUri) throws IOException {
-        ByteBuffer byteData = readToByteBuffer(in);
-        return parseByteData(byteData, charsetName, baseUri,
-                Parser.htmlParser());
-    }
-
-    /**
-     * Parses a Document from an input steam, using the provided Parser.
-     * 
-     * @param in
-     *            input stream to parse. You will need to close it.
-     * @param charsetName
-     *            character set of input
-     * @param baseUri
-     *            base URI of document, to resolve relative links against
-     * @param parser
-     *            alternate {@link Parser#xmlParser() parser} to use.
-     * @return Document
-     * @throws IOException
-     *             on IO error
-     */
-    public static Document load(InputStream in, String charsetName,
-            String baseUri, Parser parser) throws IOException {
-        ByteBuffer byteData = readToByteBuffer(in);
-        return parseByteData(byteData, charsetName, baseUri, parser);
-    }
-
-    // reads bytes first into a buffer, then decodes with the appropriate
-    // charset. done this way to support
-    // switching the chartset midstream when a meta http-equiv tag defines the
-    // charset.
-    static Document parseByteData(ByteBuffer byteData, String charsetName,
-            String baseUri, Parser parser) {
-        String docData;
-        Document doc = null;
-        if (charsetName == null) { // determine from meta. safe parse as UTF-8
-            // look for <meta http-equiv="Content-Type"
-            // content="text/html;charset=gb2312"> or HTML5 <meta
-            // charset="gb2312">
-            docData = Charset.forName(defaultCharset).decode(byteData)
-                    .toString();
-            doc = parser.parseInput(docData, baseUri);
-            Element meta = doc.select(
-                    "meta[http-equiv=content-type], meta[charset]").first();
-            if (meta != null) { // if not found, will keep utf-8 as best attempt
-                String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta
-                        .attr("content")) : meta.attr("charset");
-                if (foundCharset != null && foundCharset.length() != 0
-                        && !foundCharset.equals(defaultCharset)) { // need to
-                                                                   // re-decode
-                    charsetName = foundCharset;
-                    byteData.rewind();
-                    docData = Charset.forName(foundCharset).decode(byteData)
-                            .toString();
-                    doc = null;
-                }
-            }
-        } else { // specified by content type header (or by user on file load)
-            Validate.notEmpty(
-                    charsetName,
-                    "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
-            docData = Charset.forName(charsetName).decode(byteData).toString();
-        }
-        if (doc == null) {
-            // there are times where there is a spurious byte-order-mark at the
-            // start of the text. Shouldn't be present
-            // in utf-8. If after decoding, there is a BOM, strip it; otherwise
-            // will cause the parser to go straight
-            // into head mode
-            if (docData.charAt(0) == 65279) {
-                docData = docData.substring(1);
-            }
-
-            doc = parser.parseInput(docData, baseUri);
-            doc.outputSettings().charset(charsetName);
-        }
-        return doc;
-    }
-
-    static ByteBuffer readToByteBuffer(InputStream inStream) throws IOException {
-        byte[] buffer = new byte[bufferSize];
-        ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
-        int read;
-        while (true) {
-            read = inStream.read(buffer);
-            if (read == -1) {
-                break;
-            }
-            outStream.write(buffer, 0, read);
-        }
-        ByteBuffer byteData = ByteBuffer.wrap(outStream.toByteArray());
-        return byteData;
-    }
-
-    /**
-     * Parse out a charset from a content type header.
-     * 
-     * @param contentType
-     *            e.g. "text/html; charset=EUC-JP"
-     * @return "EUC-JP", or null if not found. Charset is trimmed and
-     *         uppercased.
-     */
-    static String getCharsetFromContentType(String contentType) {
-        if (contentType == null) {
-            return null;
-        }
-        Matcher m = charsetPattern.matcher(contentType);
-        if (m.find()) {
-            return m.group(1).trim().toUpperCase();
-        }
-        return null;
-    }
-
-}