diff options
author | PJ Fanning <fanningpj@apache.org> | 2017-11-21 22:10:48 +0000 |
---|---|---|
committer | PJ Fanning <fanningpj@apache.org> | 2017-11-21 22:10:48 +0000 |
commit | acc7b356ee8310c3f4fc694426bec365d266c370 (patch) | |
tree | ffaa147b4547588cb25b3a623eace22267483c12 /src/java/org/apache/poi/util | |
parent | 936f29af69699ee620d2ce5fa4a7110aabf68ca1 (diff) | |
download | poi-acc7b356ee8310c3f4fc694426bec365d266c370.tar.gz poi-acc7b356ee8310c3f4fc694426bec365d266c370.zip |
add forbidden api checks for non-Locale toLowerCase and toUpperCase
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1815994 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi/util')
-rw-r--r-- | src/java/org/apache/poi/util/StringUtil.java | 916 |
1 files changed, 470 insertions, 446 deletions
diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index 32b5acf9ab..8ba6be56ff 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -23,7 +23,7 @@ import java.util.Iterator; import java.util.Map; /** - * Collection of string handling utilities + * Collection of string handling utilities */ @Internal public class StringUtil { @@ -36,31 +36,31 @@ public class StringUtil { public static final Charset WIN_1252 = Charset.forName("cp1252"); public static final Charset BIG5 = Charset.forName("Big5"); - private static Map<Integer,Integer> msCodepointToUnicode; + private static Map<Integer, Integer> msCodepointToUnicode; private StringUtil() { // no instances of this class } /** - * Given a byte array of 16-bit unicode characters in Little Endian - * format (most important byte last), return a Java String representation - * of it. - * + * Given a byte array of 16-bit unicode characters in Little Endian + * format (most important byte last), return a Java String representation + * of it. + * <p> * { 0x16, 0x00 } -0x16 * - * @param string the byte array to be converted - * @param offset the initial offset into the - * byte array. it is assumed that string[ offset ] and string[ offset + - * 1 ] contain the first 16-bit unicode character - * @param len the length of the final string + * @param string the byte array to be converted + * @param offset the initial offset into the + * byte array. it is assumed that string[ offset ] and string[ offset + + * 1 ] contain the first 16-bit unicode character + * @param len the length of the final string * @return the converted string, never <code>null</code>. - * @exception ArrayIndexOutOfBoundsException if offset is out of bounds for - * the byte array (i.e., is negative or is greater than or equal to - * string.length) - * @exception IllegalArgumentException if len is too large (i.e., - * there is not enough data in string to create a String of that - * length) + * @throws ArrayIndexOutOfBoundsException if offset is out of bounds for + * the byte array (i.e., is negative or is greater than or equal to + * string.length) + * @throws IllegalArgumentException if len is too large (i.e., + * there is not enough data in string to create a String of that + * length) */ public static String getFromUnicodeLE( final byte[] string, @@ -78,17 +78,19 @@ public class StringUtil { } /** - * Given a byte array of 16-bit unicode characters in little endian - * format (most important byte last), return a Java String representation - * of it. - * + * Given a byte array of 16-bit unicode characters in little endian + * format (most important byte last), return a Java String representation + * of it. + * <p> * { 0x16, 0x00 } -0x16 * - * @param string the byte array to be converted + * @param string the byte array to be converted * @return the converted string, never <code>null</code> */ public static String getFromUnicodeLE(byte[] string) { - if(string.length == 0) { return ""; } + if (string.length == 0) { + return ""; + } return getFromUnicodeLE(string, 0, string.length / 2); } @@ -134,7 +136,7 @@ public class StringUtil { * <li>byte[]/char[] characterData</li> * </ol> * For this encoding, the is16BitFlag is always present even if nChars==0. - * + * <p> * This structure is also known as a XLUnicodeString. */ public static String readUnicodeString(LittleEndianInput in) { @@ -146,6 +148,7 @@ public class StringUtil { } return readUnicodeLE(in, nChars); } + /** * InputStream <tt>in</tt> is expected to contain: * <ol> @@ -165,6 +168,7 @@ public class StringUtil { } return readUnicodeLE(in, nChars); } + /** * OutputStream <tt>out</tt> will get: * <ol> @@ -185,114 +189,116 @@ public class StringUtil { putCompressedUnicode(value, out); } } - /** - * OutputStream <tt>out</tt> will get: - * <ol> - * <li>byte is16BitFlag</li> - * <li>byte[]/char[] characterData</li> - * </ol> - * For this encoding, the is16BitFlag is always present even if nChars==0. - * <br> - * This method should be used when the nChars field is <em>not</em> stored - * as a ushort immediately before the is16BitFlag. Otherwise, {@link - * #writeUnicodeString(LittleEndianOutput, String)} can be used. - */ - public static void writeUnicodeStringFlagAndData(LittleEndianOutput out, String value) { - boolean is16Bit = hasMultibyte(value); - out.writeByte(is16Bit ? 0x01 : 0x00); - if (is16Bit) { - putUnicodeLE(value, out); - } else { - putCompressedUnicode(value, out); - } - } - - /** - * @return the number of bytes that would be written by {@link #writeUnicodeString(LittleEndianOutput, String)} - */ - public static int getEncodedSize(String value) { - int result = 2 + 1; - result += value.length() * (StringUtil.hasMultibyte(value) ? 2 : 1); - return result; - } - - /** - * Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1 - * codepage). - * (In Excel terms, write compressed 8 bit unicode) - * - * @param input the String containing the data to be written - * @param output the byte array to which the data is to be written - * @param offset an offset into the byte arrat at which the data is start - * when written - */ - public static void putCompressedUnicode(String input, byte[] output, int offset) { - byte[] bytes = input.getBytes(ISO_8859_1); - System.arraycopy(bytes, 0, output, offset, bytes.length); - } - - public static void putCompressedUnicode(String input, LittleEndianOutput out) { - byte[] bytes = input.getBytes(ISO_8859_1); - out.write(bytes); - } - - /** - * Takes a unicode string, and returns it as little endian (most - * important byte last) bytes in the supplied byte array. - * (In Excel terms, write uncompressed unicode) - * - * @param input the String containing the unicode data to be written - * @param output the byte array to hold the uncompressed unicode, should be twice the length of the String - * @param offset the offset to start writing into the byte array - */ - public static void putUnicodeLE(String input, byte[] output, int offset) { - byte[] bytes = input.getBytes(UTF16LE); - System.arraycopy(bytes, 0, output, offset, bytes.length); - } - public static void putUnicodeLE(String input, LittleEndianOutput out) { - byte[] bytes = input.getBytes(UTF16LE); - out.write(bytes); - } - - public static String readUnicodeLE(LittleEndianInput in, int nChars) { - byte[] bytes = IOUtils.safelyAllocate(nChars*2, MAX_RECORD_LENGTH); + + /** + * OutputStream <tt>out</tt> will get: + * <ol> + * <li>byte is16BitFlag</li> + * <li>byte[]/char[] characterData</li> + * </ol> + * For this encoding, the is16BitFlag is always present even if nChars==0. + * <br> + * This method should be used when the nChars field is <em>not</em> stored + * as a ushort immediately before the is16BitFlag. Otherwise, {@link + * #writeUnicodeString(LittleEndianOutput, String)} can be used. + */ + public static void writeUnicodeStringFlagAndData(LittleEndianOutput out, String value) { + boolean is16Bit = hasMultibyte(value); + out.writeByte(is16Bit ? 0x01 : 0x00); + if (is16Bit) { + putUnicodeLE(value, out); + } else { + putCompressedUnicode(value, out); + } + } + + /** + * @return the number of bytes that would be written by {@link #writeUnicodeString(LittleEndianOutput, String)} + */ + public static int getEncodedSize(String value) { + int result = 2 + 1; + result += value.length() * (StringUtil.hasMultibyte(value) ? 2 : 1); + return result; + } + + /** + * Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1 + * codepage). + * (In Excel terms, write compressed 8 bit unicode) + * + * @param input the String containing the data to be written + * @param output the byte array to which the data is to be written + * @param offset an offset into the byte arrat at which the data is start + * when written + */ + public static void putCompressedUnicode(String input, byte[] output, int offset) { + byte[] bytes = input.getBytes(ISO_8859_1); + System.arraycopy(bytes, 0, output, offset, bytes.length); + } + + public static void putCompressedUnicode(String input, LittleEndianOutput out) { + byte[] bytes = input.getBytes(ISO_8859_1); + out.write(bytes); + } + + /** + * Takes a unicode string, and returns it as little endian (most + * important byte last) bytes in the supplied byte array. + * (In Excel terms, write uncompressed unicode) + * + * @param input the String containing the unicode data to be written + * @param output the byte array to hold the uncompressed unicode, should be twice the length of the String + * @param offset the offset to start writing into the byte array + */ + public static void putUnicodeLE(String input, byte[] output, int offset) { + byte[] bytes = input.getBytes(UTF16LE); + System.arraycopy(bytes, 0, output, offset, bytes.length); + } + + public static void putUnicodeLE(String input, LittleEndianOutput out) { + byte[] bytes = input.getBytes(UTF16LE); + out.write(bytes); + } + + public static String readUnicodeLE(LittleEndianInput in, int nChars) { + byte[] bytes = IOUtils.safelyAllocate(nChars * 2, MAX_RECORD_LENGTH); in.readFully(bytes); return new String(bytes, UTF16LE); - } - - /** - * @return the encoding we want to use, currently hardcoded to ISO-8859-1 - */ - public static String getPreferredEncoding() { - return ISO_8859_1.name(); - } - - /** - * check the parameter has multibyte character - * - * @param value string to check - * @return boolean result true:string has at least one multibyte character - */ - public static boolean hasMultibyte(String value) { - if (value == null) - return false; - for (char c : value.toCharArray()) { - if (c > 0xFF) { - return true; - } - } - return false; - } - - /** - * Checks to see if a given String needs to be represented as Unicode - * - * @param value The string to look at. - * @return true if string needs Unicode to be represented. - */ - public static boolean isUnicodeString(final String value) { + } + + /** + * @return the encoding we want to use, currently hardcoded to ISO-8859-1 + */ + public static String getPreferredEncoding() { + return ISO_8859_1.name(); + } + + /** + * check the parameter has multibyte character + * + * @param value string to check + * @return boolean result true:string has at least one multibyte character + */ + public static boolean hasMultibyte(String value) { + if (value == null) + return false; + for (char c : value.toCharArray()) { + if (c > 0xFF) { + return true; + } + } + return false; + } + + /** + * Checks to see if a given String needs to be represented as Unicode + * + * @param value The string to look at. + * @return true if string needs Unicode to be represented. + */ + public static boolean isUnicodeString(final String value) { return !value.equals(new String(value.getBytes(ISO_8859_1), ISO_8859_1)); - } + } /** * Tests if the string starts with the specified prefix, ignoring case consideration. @@ -300,7 +306,7 @@ public class StringUtil { public static boolean startsWithIgnoreCase(String haystack, String prefix) { return haystack.regionMatches(true, 0, prefix, 0, prefix.length()); } - + /** * Tests if the string ends with the specified suffix, ignoring case consideration. */ @@ -310,320 +316,338 @@ public class StringUtil { return haystack.regionMatches(true, start, suffix, 0, length); } - /** - * An Iterator over an array of Strings. - */ - public static class StringsIterator implements Iterator<String> { - private String[] strings = {}; - private int position; - public StringsIterator(String[] strings) { - if (strings != null) { - this.strings = strings.clone(); - } - } - - public boolean hasNext() { - return position < strings.length; - } - public String next() { - int ourPos = position++; - if(ourPos >= strings.length) { - throw new ArrayIndexOutOfBoundsException(ourPos); - } - return strings[ourPos]; - } - public void remove() {} - } - - - /** - * Some strings may contain encoded characters of the unicode private use area. - * Currently the characters of the symbol fonts are mapped to the corresponding - * characters in the normal unicode range. - * - * @param string the original string - * @return the string with mapped characters - * - * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a> - * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a> - */ - public static String mapMsCodepointString(String string) { - if (string == null || string.isEmpty()) return string; - initMsCodepointMap(); - - StringBuilder sb = new StringBuilder(); - final int length = string.length(); - for (int offset = 0; offset < length; ) { - Integer msCodepoint = string.codePointAt(offset); - Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint); - sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint); - offset += Character.charCount(msCodepoint); - } - - return sb.toString(); - } - - public static synchronized void mapMsCodepoint(int msCodepoint, int unicodeCodepoint) { - initMsCodepointMap(); - msCodepointToUnicode.put(msCodepoint, unicodeCodepoint); - } - - private static synchronized void initMsCodepointMap() { - if (msCodepointToUnicode != null) return; - msCodepointToUnicode = new HashMap<>(); - int i=0xF020; - for (int ch : symbolMap_f020) { - msCodepointToUnicode.put(i++, ch); - } - i = 0xf0a0; - for (int ch : symbolMap_f0a0) { - msCodepointToUnicode.put(i++, ch); - } - } - - private static final int symbolMap_f020[] = { - ' ', // 0xf020 space - '!', // 0xf021 exclam - 8704, // 0xf022 universal - '#', // 0xf023 numbersign - 8707, // 0xf024 existential - '%', // 0xf025 percent - '&', // 0xf026 ampersand - 8717, // 0xf027 suchthat - '(', // 0xf028 parenleft - ')', // 0xf029 parentright - 8727, // 0xf02a asteriskmath - '+', // 0xf02b plus - ',', // 0xf02c comma - 8722, // 0xf02d minus sign (long -) - '.', // 0xf02e period - '/', // 0xf02f slash - '0', // 0xf030 0 - '1', // 0xf031 1 - '2', // 0xf032 2 - '3', // 0xf033 3 - '4', // 0xf034 4 - '5', // 0xf035 5 - '6', // 0xf036 6 - '7', // 0xf037 7 - '8', // 0xf038 8 - '9', // 0xf039 9 - ':', // 0xf03a colon - ';', // 0xf03b semicolon - '<', // 0xf03c less - '=', // 0xf03d equal - '>', // 0xf03e greater - '?', // 0xf03f question - 8773, // 0xf040 congruent - 913, // 0xf041 alpha (upper) - 914, // 0xf042 beta (upper) - 935, // 0xf043 chi (upper) - 916, // 0xf044 delta (upper) - 917, // 0xf045 epsilon (upper) - 934, // 0xf046 phi (upper) - 915, // 0xf047 gamma (upper) - 919, // 0xf048 eta (upper) - 921, // 0xf049 iota (upper) - 977, // 0xf04a theta1 (lower) - 922, // 0xf04b kappa (upper) - 923, // 0xf04c lambda (upper) - 924, // 0xf04d mu (upper) - 925, // 0xf04e nu (upper) - 927, // 0xf04f omicron (upper) - 928, // 0xf050 pi (upper) - 920, // 0xf051 theta (upper) - 929, // 0xf052 rho (upper) - 931, // 0xf053 sigma (upper) - 932, // 0xf054 tau (upper) - 933, // 0xf055 upsilon (upper) - 962, // 0xf056 simga1 (lower) - 937, // 0xf057 omega (upper) - 926, // 0xf058 xi (upper) - 936, // 0xf059 psi (upper) - 918, // 0xf05a zeta (upper) - '[', // 0xf05b bracketleft - 8765, // 0xf05c therefore - ']', // 0xf05d bracketright - 8869, // 0xf05e perpendicular - '_', // 0xf05f underscore - ' ', // 0xf060 radicalex (doesn't exist in unicode) - 945, // 0xf061 alpha (lower) - 946, // 0xf062 beta (lower) - 967, // 0xf063 chi (lower) - 948, // 0xf064 delta (lower) - 949, // 0xf065 epsilon (lower) - 966, // 0xf066 phi (lower) - 947, // 0xf067 gamma (lower) - 951, // 0xf068 eta (lower) - 953, // 0xf069 iota (lower) - 981, // 0xf06a phi1 (lower) - 954, // 0xf06b kappa (lower) - 955, // 0xf06c lambda (lower) - 956, // 0xf06d mu (lower) - 957, // 0xf06e nu (lower) - 959, // 0xf06f omnicron (lower) - 960, // 0xf070 pi (lower) - 952, // 0xf071 theta (lower) - 961, // 0xf072 rho (lower) - 963, // 0xf073 sigma (lower) - 964, // 0xf074 tau (lower) - 965, // 0xf075 upsilon (lower) - 982, // 0xf076 piv (lower) - 969, // 0xf077 omega (lower) - 958, // 0xf078 xi (lower) - 968, // 0xf079 psi (lower) - 950, // 0xf07a zeta (lower) - '{', // 0xf07b braceleft - '|', // 0xf07c bar - '}', // 0xf07d braceright - 8764, // 0xf07e similar '~' - ' ', // 0xf07f not defined - }; - - private static final int symbolMap_f0a0[] = { - 8364, // 0xf0a0 not defined / euro symbol - 978, // 0xf0a1 upsilon1 (upper) - 8242, // 0xf0a2 minute - 8804, // 0xf0a3 lessequal - 8260, // 0xf0a4 fraction - 8734, // 0xf0a5 infinity - 402, // 0xf0a6 florin - 9827, // 0xf0a7 club - 9830, // 0xf0a8 diamond - 9829, // 0xf0a9 heart - 9824, // 0xf0aa spade - 8596, // 0xf0ab arrowboth - 8591, // 0xf0ac arrowleft - 8593, // 0xf0ad arrowup - 8594, // 0xf0ae arrowright - 8595, // 0xf0af arrowdown - 176, // 0xf0b0 degree - 177, // 0xf0b1 plusminus - 8243, // 0xf0b2 second - 8805, // 0xf0b3 greaterequal - 215, // 0xf0b4 multiply - 181, // 0xf0b5 proportional - 8706, // 0xf0b6 partialdiff - 8729, // 0xf0b7 bullet - 247, // 0xf0b8 divide - 8800, // 0xf0b9 notequal - 8801, // 0xf0ba equivalence - 8776, // 0xf0bb approxequal - 8230, // 0xf0bc ellipsis - 9168, // 0xf0bd arrowvertex - 9135, // 0xf0be arrowhorizex - 8629, // 0xf0bf carriagereturn - 8501, // 0xf0c0 aleph - 8475, // 0xf0c1 Ifraktur - 8476, // 0xf0c2 Rfraktur - 8472, // 0xf0c3 weierstrass - 8855, // 0xf0c4 circlemultiply - 8853, // 0xf0c5 circleplus - 8709, // 0xf0c6 emptyset - 8745, // 0xf0c7 intersection - 8746, // 0xf0c8 union - 8835, // 0xf0c9 propersuperset - 8839, // 0xf0ca reflexsuperset - 8836, // 0xf0cb notsubset - 8834, // 0xf0cc propersubset - 8838, // 0xf0cd reflexsubset - 8712, // 0xf0ce element - 8713, // 0xf0cf notelement - 8736, // 0xf0d0 angle - 8711, // 0xf0d1 gradient - 174, // 0xf0d2 registerserif - 169, // 0xf0d3 copyrightserif - 8482, // 0xf0d4 trademarkserif - 8719, // 0xf0d5 product - 8730, // 0xf0d6 radical - 8901, // 0xf0d7 dotmath - 172, // 0xf0d8 logicalnot - 8743, // 0xf0d9 logicaland - 8744, // 0xf0da logicalor - 8660, // 0xf0db arrowdblboth - 8656, // 0xf0dc arrowdblleft - 8657, // 0xf0dd arrowdblup - 8658, // 0xf0de arrowdblright - 8659, // 0xf0df arrowdbldown - 9674, // 0xf0e0 lozenge - 9001, // 0xf0e1 angleleft - 174, // 0xf0e2 registersans - 169, // 0xf0e3 copyrightsans - 8482, // 0xf0e4 trademarksans - 8721, // 0xf0e5 summation - 9115, // 0xf0e6 parenlefttp - 9116, // 0xf0e7 parenleftex - 9117, // 0xf0e8 parenleftbt - 9121, // 0xf0e9 bracketlefttp - 9122, // 0xf0ea bracketleftex - 9123, // 0xf0eb bracketleftbt - 9127, // 0xf0ec bracelefttp - 9128, // 0xf0ed braceleftmid - 9129, // 0xf0ee braceleftbt - 9130, // 0xf0ef braceex - ' ', // 0xf0f0 not defined - 9002, // 0xf0f1 angleright - 8747, // 0xf0f2 integral - 8992, // 0xf0f3 integraltp - 9134, // 0xf0f4 integralex - 8993, // 0xf0f5 integralbt - 9118, // 0xf0f6 parenrighttp - 9119, // 0xf0f7 parenrightex - 9120, // 0xf0f8 parenrightbt - 9124, // 0xf0f9 bracketrighttp - 9125, // 0xf0fa bracketrightex - 9126, // 0xf0fb bracketrightbt - 9131, // 0xf0fc bracerighttp - 9132, // 0xf0fd bracerightmid - 9133, // 0xf0fe bracerightbt - ' ', // 0xf0ff not defined - }; - - - // Could be replaced with org.apache.commons.lang3.StringUtils#join - @Internal - public static String join(Object[] array, String separator) { - if (array == null || array.length == 0) return ""; - StringBuilder sb = new StringBuilder(); - sb.append(array[0]); - for (int i=1; i<array.length; i++) { - sb.append(separator).append(array[i]); - } - return sb.toString(); - } - - @Internal - public static String join(Object[] array) { - if (array == null) return ""; - StringBuilder sb = new StringBuilder(); - for (Object o : array) { - sb.append(o); - } - return sb.toString(); - } - - @Internal - public static String join(String separator, Object... array) { - return join(array, separator); - } - - /** - * Count number of occurrences of needle in haystack - * Has same signature as org.apache.commons.lang3.StringUtils#countMatches - * - * @param haystack the CharSequence to check, may be null - * @param needle the character to count the quantity of - * @return the number of occurrences, 0 if the CharSequence is null - */ - public static int countMatches(CharSequence haystack, char needle) { - if (haystack == null) return 0; - int count = 0; - final int length = haystack.length(); - for (int i=0; i<length; i++) { - if (haystack.charAt(i) == needle) { - count++; - } - } - return count; - } + /** + * An Iterator over an array of Strings. + */ + public static class StringsIterator implements Iterator<String> { + private String[] strings = {}; + private int position; + + public StringsIterator(String[] strings) { + if (strings != null) { + this.strings = strings.clone(); + } + } + + public boolean hasNext() { + return position < strings.length; + } + + public String next() { + int ourPos = position++; + if (ourPos >= strings.length) { + throw new ArrayIndexOutOfBoundsException(ourPos); + } + return strings[ourPos]; + } + + public void remove() { + } + } + + @Internal + public static String toLowerCase(char c) { + return Character.toString(c).toLowerCase(LocaleUtil.getUserLocale()); + } + + @Internal + public static String toUpperCase(char c) { + return Character.toString(c).toUpperCase(LocaleUtil.getUserLocale()); + } + + @Internal + public static boolean isUpperCase(char c) { + String s = Character.toString(c); + return s.toUpperCase(LocaleUtil.getUserLocale()).equals(s); + } + + /** + * Some strings may contain encoded characters of the unicode private use area. + * Currently the characters of the symbol fonts are mapped to the corresponding + * characters in the normal unicode range. + * + * @param string the original string + * @return the string with mapped characters + * @see <a href="http://www.alanwood.net/unicode/private_use_area.html#symbol">Private Use Area (symbol)</a> + * @see <a href="http://www.alanwood.net/demos/symbol.html">Symbol font - Unicode alternatives for Greek and special characters in HTML</a> + */ + public static String mapMsCodepointString(String string) { + if (string == null || string.isEmpty()) return string; + initMsCodepointMap(); + + StringBuilder sb = new StringBuilder(); + final int length = string.length(); + for (int offset = 0; offset < length; ) { + Integer msCodepoint = string.codePointAt(offset); + Integer uniCodepoint = msCodepointToUnicode.get(msCodepoint); + sb.appendCodePoint(uniCodepoint == null ? msCodepoint : uniCodepoint); + offset += Character.charCount(msCodepoint); + } + + return sb.toString(); + } + + public static synchronized void mapMsCodepoint(int msCodepoint, int unicodeCodepoint) { + initMsCodepointMap(); + msCodepointToUnicode.put(msCodepoint, unicodeCodepoint); + } + + private static synchronized void initMsCodepointMap() { + if (msCodepointToUnicode != null) return; + msCodepointToUnicode = new HashMap<>(); + int i = 0xF020; + for (int ch : symbolMap_f020) { + msCodepointToUnicode.put(i++, ch); + } + i = 0xf0a0; + for (int ch : symbolMap_f0a0) { + msCodepointToUnicode.put(i++, ch); + } + } + + private static final int symbolMap_f020[] = { + ' ', // 0xf020 space + '!', // 0xf021 exclam + 8704, // 0xf022 universal + '#', // 0xf023 numbersign + 8707, // 0xf024 existential + '%', // 0xf025 percent + '&', // 0xf026 ampersand + 8717, // 0xf027 suchthat + '(', // 0xf028 parenleft + ')', // 0xf029 parentright + 8727, // 0xf02a asteriskmath + '+', // 0xf02b plus + ',', // 0xf02c comma + 8722, // 0xf02d minus sign (long -) + '.', // 0xf02e period + '/', // 0xf02f slash + '0', // 0xf030 0 + '1', // 0xf031 1 + '2', // 0xf032 2 + '3', // 0xf033 3 + '4', // 0xf034 4 + '5', // 0xf035 5 + '6', // 0xf036 6 + '7', // 0xf037 7 + '8', // 0xf038 8 + '9', // 0xf039 9 + ':', // 0xf03a colon + ';', // 0xf03b semicolon + '<', // 0xf03c less + '=', // 0xf03d equal + '>', // 0xf03e greater + '?', // 0xf03f question + 8773, // 0xf040 congruent + 913, // 0xf041 alpha (upper) + 914, // 0xf042 beta (upper) + 935, // 0xf043 chi (upper) + 916, // 0xf044 delta (upper) + 917, // 0xf045 epsilon (upper) + 934, // 0xf046 phi (upper) + 915, // 0xf047 gamma (upper) + 919, // 0xf048 eta (upper) + 921, // 0xf049 iota (upper) + 977, // 0xf04a theta1 (lower) + 922, // 0xf04b kappa (upper) + 923, // 0xf04c lambda (upper) + 924, // 0xf04d mu (upper) + 925, // 0xf04e nu (upper) + 927, // 0xf04f omicron (upper) + 928, // 0xf050 pi (upper) + 920, // 0xf051 theta (upper) + 929, // 0xf052 rho (upper) + 931, // 0xf053 sigma (upper) + 932, // 0xf054 tau (upper) + 933, // 0xf055 upsilon (upper) + 962, // 0xf056 simga1 (lower) + 937, // 0xf057 omega (upper) + 926, // 0xf058 xi (upper) + 936, // 0xf059 psi (upper) + 918, // 0xf05a zeta (upper) + '[', // 0xf05b bracketleft + 8765, // 0xf05c therefore + ']', // 0xf05d bracketright + 8869, // 0xf05e perpendicular + '_', // 0xf05f underscore + ' ', // 0xf060 radicalex (doesn't exist in unicode) + 945, // 0xf061 alpha (lower) + 946, // 0xf062 beta (lower) + 967, // 0xf063 chi (lower) + 948, // 0xf064 delta (lower) + 949, // 0xf065 epsilon (lower) + 966, // 0xf066 phi (lower) + 947, // 0xf067 gamma (lower) + 951, // 0xf068 eta (lower) + 953, // 0xf069 iota (lower) + 981, // 0xf06a phi1 (lower) + 954, // 0xf06b kappa (lower) + 955, // 0xf06c lambda (lower) + 956, // 0xf06d mu (lower) + 957, // 0xf06e nu (lower) + 959, // 0xf06f omnicron (lower) + 960, // 0xf070 pi (lower) + 952, // 0xf071 theta (lower) + 961, // 0xf072 rho (lower) + 963, // 0xf073 sigma (lower) + 964, // 0xf074 tau (lower) + 965, // 0xf075 upsilon (lower) + 982, // 0xf076 piv (lower) + 969, // 0xf077 omega (lower) + 958, // 0xf078 xi (lower) + 968, // 0xf079 psi (lower) + 950, // 0xf07a zeta (lower) + '{', // 0xf07b braceleft + '|', // 0xf07c bar + '}', // 0xf07d braceright + 8764, // 0xf07e similar '~' + ' ', // 0xf07f not defined + }; + + private static final int symbolMap_f0a0[] = { + 8364, // 0xf0a0 not defined / euro symbol + 978, // 0xf0a1 upsilon1 (upper) + 8242, // 0xf0a2 minute + 8804, // 0xf0a3 lessequal + 8260, // 0xf0a4 fraction + 8734, // 0xf0a5 infinity + 402, // 0xf0a6 florin + 9827, // 0xf0a7 club + 9830, // 0xf0a8 diamond + 9829, // 0xf0a9 heart + 9824, // 0xf0aa spade + 8596, // 0xf0ab arrowboth + 8591, // 0xf0ac arrowleft + 8593, // 0xf0ad arrowup + 8594, // 0xf0ae arrowright + 8595, // 0xf0af arrowdown + 176, // 0xf0b0 degree + 177, // 0xf0b1 plusminus + 8243, // 0xf0b2 second + 8805, // 0xf0b3 greaterequal + 215, // 0xf0b4 multiply + 181, // 0xf0b5 proportional + 8706, // 0xf0b6 partialdiff + 8729, // 0xf0b7 bullet + 247, // 0xf0b8 divide + 8800, // 0xf0b9 notequal + 8801, // 0xf0ba equivalence + 8776, // 0xf0bb approxequal + 8230, // 0xf0bc ellipsis + 9168, // 0xf0bd arrowvertex + 9135, // 0xf0be arrowhorizex + 8629, // 0xf0bf carriagereturn + 8501, // 0xf0c0 aleph + 8475, // 0xf0c1 Ifraktur + 8476, // 0xf0c2 Rfraktur + 8472, // 0xf0c3 weierstrass + 8855, // 0xf0c4 circlemultiply + 8853, // 0xf0c5 circleplus + 8709, // 0xf0c6 emptyset + 8745, // 0xf0c7 intersection + 8746, // 0xf0c8 union + 8835, // 0xf0c9 propersuperset + 8839, // 0xf0ca reflexsuperset + 8836, // 0xf0cb notsubset + 8834, // 0xf0cc propersubset + 8838, // 0xf0cd reflexsubset + 8712, // 0xf0ce element + 8713, // 0xf0cf notelement + 8736, // 0xf0d0 angle + 8711, // 0xf0d1 gradient + 174, // 0xf0d2 registerserif + 169, // 0xf0d3 copyrightserif + 8482, // 0xf0d4 trademarkserif + 8719, // 0xf0d5 product + 8730, // 0xf0d6 radical + 8901, // 0xf0d7 dotmath + 172, // 0xf0d8 logicalnot + 8743, // 0xf0d9 logicaland + 8744, // 0xf0da logicalor + 8660, // 0xf0db arrowdblboth + 8656, // 0xf0dc arrowdblleft + 8657, // 0xf0dd arrowdblup + 8658, // 0xf0de arrowdblright + 8659, // 0xf0df arrowdbldown + 9674, // 0xf0e0 lozenge + 9001, // 0xf0e1 angleleft + 174, // 0xf0e2 registersans + 169, // 0xf0e3 copyrightsans + 8482, // 0xf0e4 trademarksans + 8721, // 0xf0e5 summation + 9115, // 0xf0e6 parenlefttp + 9116, // 0xf0e7 parenleftex + 9117, // 0xf0e8 parenleftbt + 9121, // 0xf0e9 bracketlefttp + 9122, // 0xf0ea bracketleftex + 9123, // 0xf0eb bracketleftbt + 9127, // 0xf0ec bracelefttp + 9128, // 0xf0ed braceleftmid + 9129, // 0xf0ee braceleftbt + 9130, // 0xf0ef braceex + ' ', // 0xf0f0 not defined + 9002, // 0xf0f1 angleright + 8747, // 0xf0f2 integral + 8992, // 0xf0f3 integraltp + 9134, // 0xf0f4 integralex + 8993, // 0xf0f5 integralbt + 9118, // 0xf0f6 parenrighttp + 9119, // 0xf0f7 parenrightex + 9120, // 0xf0f8 parenrightbt + 9124, // 0xf0f9 bracketrighttp + 9125, // 0xf0fa bracketrightex + 9126, // 0xf0fb bracketrightbt + 9131, // 0xf0fc bracerighttp + 9132, // 0xf0fd bracerightmid + 9133, // 0xf0fe bracerightbt + ' ', // 0xf0ff not defined + }; + + + // Could be replaced with org.apache.commons.lang3.StringUtils#join + @Internal + public static String join(Object[] array, String separator) { + if (array == null || array.length == 0) return ""; + StringBuilder sb = new StringBuilder(); + sb.append(array[0]); + for (int i = 1; i < array.length; i++) { + sb.append(separator).append(array[i]); + } + return sb.toString(); + } + + @Internal + public static String join(Object[] array) { + if (array == null) return ""; + StringBuilder sb = new StringBuilder(); + for (Object o : array) { + sb.append(o); + } + return sb.toString(); + } + + @Internal + public static String join(String separator, Object... array) { + return join(array, separator); + } + + /** + * Count number of occurrences of needle in haystack + * Has same signature as org.apache.commons.lang3.StringUtils#countMatches + * + * @param haystack the CharSequence to check, may be null + * @param needle the character to count the quantity of + * @return the number of occurrences, 0 if the CharSequence is null + */ + public static int countMatches(CharSequence haystack, char needle) { + if (haystack == null) return 0; + int count = 0; + final int length = haystack.length(); + for (int i = 0; i < length; i++) { + if (haystack.charAt(i) == needle) { + count++; + } + } + return count; + } } |