summaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Entities.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Entities.java')
-rw-r--r--server/src/org/jsoup/nodes/Entities.java217
1 files changed, 0 insertions, 217 deletions
diff --git a/server/src/org/jsoup/nodes/Entities.java b/server/src/org/jsoup/nodes/Entities.java
deleted file mode 100644
index 24b50d7344..0000000000
--- a/server/src/org/jsoup/nodes/Entities.java
+++ /dev/null
@@ -1,217 +0,0 @@
-package org.jsoup.nodes;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.CharsetEncoder;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.MissingResourceException;
-import java.util.Properties;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * HTML entities, and escape routines. Source: <a href=
- * "http://www.w3.org/TR/html5/named-character-references.html#named-character-references"
- * >W3C HTML named character references</a>.
- */
-public class Entities {
- public enum EscapeMode {
- /**
- * Restricted entities suitable for XHTML output: lt, gt, amp, apos, and
- * quot only.
- */
- xhtml(xhtmlByVal),
- /** Default HTML output entities. */
- base(baseByVal),
- /** Complete HTML entities. */
- extended(fullByVal);
-
- private Map<Character, String> map;
-
- EscapeMode(Map<Character, String> map) {
- this.map = map;
- }
-
- public Map<Character, String> getMap() {
- return map;
- }
- }
-
- private static final Map<String, Character> full;
- private static final Map<Character, String> xhtmlByVal;
- private static final Map<Character, String> baseByVal;
- private static final Map<Character, String> fullByVal;
- private static final Pattern unescapePattern = Pattern
- .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
- private static final Pattern strictUnescapePattern = Pattern
- .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
-
- private Entities() {
- }
-
- /**
- * Check if the input is a known named entity
- *
- * @param name
- * the possible entity name (e.g. "lt" or "amp"
- * @return true if a known named entity
- */
- public static boolean isNamedEntity(String name) {
- return full.containsKey(name);
- }
-
-/**
- * Get the Character value of the named entity
- * @param name named entity (e.g. "lt" or "amp")
- * @return the Character value of the named entity (e.g. '<' or '&')
- */
- public static Character getCharacterByName(String name) {
- return full.get(name);
- }
-
- static String escape(String string, Document.OutputSettings out) {
- return escape(string, out.encoder(), out.escapeMode());
- }
-
- static String escape(String string, CharsetEncoder encoder,
- EscapeMode escapeMode) {
- StringBuilder accum = new StringBuilder(string.length() * 2);
- Map<Character, String> map = escapeMode.getMap();
-
- for (int pos = 0; pos < string.length(); pos++) {
- Character c = string.charAt(pos);
- if (map.containsKey(c)) {
- accum.append('&').append(map.get(c)).append(';');
- } else if (encoder.canEncode(c)) {
- accum.append(c.charValue());
- } else {
- accum.append("&#").append((int) c).append(';');
- }
- }
-
- return accum.toString();
- }
-
- static String unescape(String string) {
- return unescape(string, false);
- }
-
- /**
- * Unescape the input string.
- *
- * @param string
- * @param strict
- * if "strict" (that is, requires trailing ';' char, otherwise
- * that's optional)
- * @return
- */
- static String unescape(String string, boolean strict) {
- // todo: change this method to use Tokeniser.consumeCharacterReference
- if (!string.contains("&")) {
- return string;
- }
-
- Matcher m = strict ? strictUnescapePattern.matcher(string)
- : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
- StringBuffer accum = new StringBuffer(string.length()); // pity matcher
- // can't use
- // stringbuilder,
- // avoid syncs
- // todo: replace m.appendReplacement with own impl, so StringBuilder and
- // quoteReplacement not required
-
- while (m.find()) {
- int charval = -1;
- String num = m.group(3);
- if (num != null) {
- try {
- int base = m.group(2) != null ? 16 : 10; // 2 is hex
- // indicator
- charval = Integer.valueOf(num, base);
- } catch (NumberFormatException e) {
- } // skip
- } else {
- String name = m.group(1);
- if (full.containsKey(name)) {
- charval = full.get(name);
- }
- }
-
- if (charval != -1 || charval > 0xFFFF) { // out of range
- String c = Character.toString((char) charval);
- m.appendReplacement(accum, Matcher.quoteReplacement(c));
- } else {
- m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace
- // with
- // original
- // string
- }
- }
- m.appendTail(accum);
- return accum.toString();
- }
-
- // xhtml has restricted entities
- private static final Object[][] xhtmlArray = { { "quot", 0x00022 },
- { "amp", 0x00026 }, { "apos", 0x00027 }, { "lt", 0x0003C },
- { "gt", 0x0003E } };
-
- static {
- xhtmlByVal = new HashMap<Character, String>();
- baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most
- // common
- // /
- // default
- full = loadEntities("entities-full.properties"); // extended and
- // overblown.
- fullByVal = toCharacterKey(full);
-
- for (Object[] entity : xhtmlArray) {
- Character c = Character.valueOf((char) ((Integer) entity[1])
- .intValue());
- xhtmlByVal.put(c, ((String) entity[0]));
- }
- }
-
- private static Map<String, Character> loadEntities(String filename) {
- Properties properties = new Properties();
- Map<String, Character> entities = new HashMap<String, Character>();
- try {
- InputStream in = Entities.class.getResourceAsStream(filename);
- properties.load(in);
- in.close();
- } catch (IOException e) {
- throw new MissingResourceException(
- "Error loading entities resource: " + e.getMessage(),
- "Entities", filename);
- }
-
- for (Map.Entry entry : properties.entrySet()) {
- Character val = Character.valueOf((char) Integer.parseInt(
- (String) entry.getValue(), 16));
- String name = (String) entry.getKey();
- entities.put(name, val);
- }
- return entities;
- }
-
- private static Map<Character, String> toCharacterKey(
- Map<String, Character> inMap) {
- Map<Character, String> outMap = new HashMap<Character, String>();
- for (Map.Entry<String, Character> entry : inMap.entrySet()) {
- Character character = entry.getValue();
- String name = entry.getKey();
-
- if (outMap.containsKey(character)) {
- // dupe, prefer the lower case version
- if (name.toLowerCase().equals(name)) {
- outMap.put(character, name);
- }
- } else {
- outMap.put(character, name);
- }
- }
- return outMap;
- }
-}