summaryrefslogtreecommitdiffstats
path: root/src/com/gitblit/utils/StringUtils.java
diff options
context:
space:
mode:
authorJames Moger <james.moger@gitblit.com>2012-06-07 17:30:18 -0400
committerJames Moger <james.moger@gitblit.com>2012-06-07 17:30:18 -0400
commitae9e157ef4e6a3708489725d4436cc15d273308f (patch)
tree5d93ddebf97927205789d702ae8eebc5e613ff08 /src/com/gitblit/utils/StringUtils.java
parent47867891efc2aa996fa78f7c224e46d65dc04457 (diff)
downloadgitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.tar.gz
gitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.zip
Try multiple encodings when working with string blobs (issue 97)
Diffstat (limited to 'src/com/gitblit/utils/StringUtils.java')
-rw-r--r--src/com/gitblit/utils/StringUtils.java42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java
index 2c357241..baed5f0c 100644
--- a/src/com/gitblit/utils/StringUtils.java
+++ b/src/com/gitblit/utils/StringUtils.java
@@ -16,13 +16,23 @@
package com.gitblit.utils;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Set;
import java.util.regex.PatternSyntaxException;
/**
@@ -550,4 +560,36 @@ public class StringUtils {
// remember to append any characters to the right of a match
return sb.toString();
}
+
+ /**
+ * Decodes a string by trying several charsets until one does not throw a
+ * coding exception. Last resort is to interpret as UTF-8 with illegal
+ * character substitution.
+ *
+ * @param content
+ * @param charsets optional
+ * @return a string
+ */
+ public static String decodeString(byte [] content, String... charsets) {
+ Set<String> sets = new LinkedHashSet<String>();
+ if (!ArrayUtils.isEmpty(charsets)) {
+ sets.addAll(Arrays.asList(charsets));
+ }
+ sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));
+ for (String charset : sets) {
+ try {
+ Charset cs = Charset.forName(charset);
+ CharsetDecoder decoder = cs.newDecoder();
+ CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));
+ return buffer.toString();
+ } catch (CharacterCodingException e) {
+ // ignore and advance to the next charset
+ } catch (IllegalCharsetNameException e) {
+ // ignore illegal charset names
+ } catch (UnsupportedCharsetException e) {
+ // ignore unsupported charsets
+ }
+ }
+ return new String(content, Charset.forName("UTF-8"));
+ }
} \ No newline at end of file