aboutsummaryrefslogtreecommitdiffstats
path: root/server/src/org/jsoup/nodes/Entities.java
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/org/jsoup/nodes/Entities.java')
-rw-r--r--server/src/org/jsoup/nodes/Entities.java111
1 files changed, 72 insertions, 39 deletions
diff --git a/server/src/org/jsoup/nodes/Entities.java b/server/src/org/jsoup/nodes/Entities.java
index 0ae83e1fc0..24b50d7344 100644
--- a/server/src/org/jsoup/nodes/Entities.java
+++ b/server/src/org/jsoup/nodes/Entities.java
@@ -3,18 +3,24 @@ package org.jsoup.nodes;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
- * HTML entities, and escape routines.
- * Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
- * named character references</a>.
+ * HTML entities, and escape routines. Source: <a href=
+ * "http://www.w3.org/TR/html5/named-character-references.html#named-character-references"
+ * >W3C HTML named character references</a>.
*/
public class Entities {
public enum EscapeMode {
- /** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */
+ /**
+ * Restricted entities suitable for XHTML output: lt, gt, amp, apos, and
+ * quot only.
+ */
xhtml(xhtmlByVal),
/** Default HTML output entities. */
base(baseByVal),
@@ -36,21 +42,26 @@ public class Entities {
private static final Map<Character, String> xhtmlByVal;
private static final Map<Character, String> baseByVal;
private static final Map<Character, String> fullByVal;
- private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
- private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
+ private static final Pattern unescapePattern = Pattern
+ .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
+ private static final Pattern strictUnescapePattern = Pattern
+ .compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
- private Entities() {}
+ private Entities() {
+ }
/**
* Check if the input is a known named entity
- * @param name the possible entity name (e.g. "lt" or "amp"
+ *
+ * @param name
+ * the possible entity name (e.g. "lt" or "amp"
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}
- /**
+/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
@@ -58,23 +69,25 @@ public class Entities {
public static Character getCharacterByName(String name) {
return full.get(name);
}
-
+
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}
- static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
+ static String escape(String string, CharsetEncoder encoder,
+ EscapeMode escapeMode) {
StringBuilder accum = new StringBuilder(string.length() * 2);
Map<Character, String> map = escapeMode.getMap();
for (int pos = 0; pos < string.length(); pos++) {
Character c = string.charAt(pos);
- if (map.containsKey(c))
+ if (map.containsKey(c)) {
accum.append('&').append(map.get(c)).append(';');
- else if (encoder.canEncode(c))
+ } else if (encoder.canEncode(c)) {
accum.append(c.charValue());
- else
+ } else {
accum.append("&#").append((int) c).append(';');
+ }
}
return accum.toString();
@@ -86,39 +99,53 @@ public class Entities {
/**
* Unescape the input string.
+ *
* @param string
- * @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional)
+ * @param strict
+ * if "strict" (that is, requires trailing ';' char, otherwise
+ * that's optional)
* @return
*/
static String unescape(String string, boolean strict) {
// todo: change this method to use Tokeniser.consumeCharacterReference
- if (!string.contains("&"))
+ if (!string.contains("&")) {
return string;
+ }
- Matcher m = strict? strictUnescapePattern.matcher(string) : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
- StringBuffer accum = new StringBuffer(string.length()); // pity matcher can't use stringbuilder, avoid syncs
- // todo: replace m.appendReplacement with own impl, so StringBuilder and quoteReplacement not required
+ Matcher m = strict ? strictUnescapePattern.matcher(string)
+ : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
+ StringBuffer accum = new StringBuffer(string.length()); // pity matcher
+ // can't use
+ // stringbuilder,
+ // avoid syncs
+ // todo: replace m.appendReplacement with own impl, so StringBuilder and
+ // quoteReplacement not required
while (m.find()) {
int charval = -1;
String num = m.group(3);
if (num != null) {
try {
- int base = m.group(2) != null ? 16 : 10; // 2 is hex indicator
+ int base = m.group(2) != null ? 16 : 10; // 2 is hex
+ // indicator
charval = Integer.valueOf(num, base);
} catch (NumberFormatException e) {
} // skip
} else {
String name = m.group(1);
- if (full.containsKey(name))
+ if (full.containsKey(name)) {
charval = full.get(name);
+ }
}
if (charval != -1 || charval > 0xFFFF) { // out of range
String c = Character.toString((char) charval);
m.appendReplacement(accum, Matcher.quoteReplacement(c));
} else {
- m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace with original string
+ m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace
+ // with
+ // original
+ // string
}
}
m.appendTail(accum);
@@ -126,22 +153,23 @@ public class Entities {
}
// xhtml has restricted entities
- private static final Object[][] xhtmlArray = {
- {"quot", 0x00022},
- {"amp", 0x00026},
- {"apos", 0x00027},
- {"lt", 0x0003C},
- {"gt", 0x0003E}
- };
+ private static final Object[][] xhtmlArray = { { "quot", 0x00022 },
+ { "amp", 0x00026 }, { "apos", 0x00027 }, { "lt", 0x0003C },
+ { "gt", 0x0003E } };
static {
xhtmlByVal = new HashMap<Character, String>();
- baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most common / default
- full = loadEntities("entities-full.properties"); // extended and overblown.
+ baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most
+ // common
+ // /
+ // default
+ full = loadEntities("entities-full.properties"); // extended and
+ // overblown.
fullByVal = toCharacterKey(full);
for (Object[] entity : xhtmlArray) {
- Character c = Character.valueOf((char) ((Integer) entity[1]).intValue());
+ Character c = Character.valueOf((char) ((Integer) entity[1])
+ .intValue());
xhtmlByVal.put(c, ((String) entity[0]));
}
}
@@ -154,27 +182,32 @@ public class Entities {
properties.load(in);
in.close();
} catch (IOException e) {
- throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename);
+ throw new MissingResourceException(
+ "Error loading entities resource: " + e.getMessage(),
+ "Entities", filename);
}
- for (Map.Entry entry: properties.entrySet()) {
- Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16));
+ for (Map.Entry entry : properties.entrySet()) {
+ Character val = Character.valueOf((char) Integer.parseInt(
+ (String) entry.getValue(), 16));
String name = (String) entry.getKey();
entities.put(name, val);
}
return entities;
}
- private static Map<Character, String> toCharacterKey(Map<String, Character> inMap) {
+ private static Map<Character, String> toCharacterKey(
+ Map<String, Character> inMap) {
Map<Character, String> outMap = new HashMap<Character, String>();
- for (Map.Entry<String, Character> entry: inMap.entrySet()) {
+ for (Map.Entry<String, Character> entry : inMap.entrySet()) {
Character character = entry.getValue();
String name = entry.getKey();
if (outMap.containsKey(character)) {
// dupe, prefer the lower case version
- if (name.toLowerCase().equals(name))
+ if (name.toLowerCase().equals(name)) {
outMap.put(character, name);
+ }
} else {
outMap.put(character, name);
}