diff options
author | James Moger <james.moger@gitblit.com> | 2012-06-07 17:30:18 -0400 |
---|---|---|
committer | James Moger <james.moger@gitblit.com> | 2012-06-07 17:30:18 -0400 |
commit | ae9e157ef4e6a3708489725d4436cc15d273308f (patch) | |
tree | 5d93ddebf97927205789d702ae8eebc5e613ff08 /src/com/gitblit/utils/StringUtils.java | |
parent | 47867891efc2aa996fa78f7c224e46d65dc04457 (diff) | |
download | gitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.tar.gz gitblit-ae9e157ef4e6a3708489725d4436cc15d273308f.zip |
Try multiple encodings when working with string blobs (issue 97)
Diffstat (limited to 'src/com/gitblit/utils/StringUtils.java')
-rw-r--r-- | src/com/gitblit/utils/StringUtils.java | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java index 2c357241..baed5f0c 100644 --- a/src/com/gitblit/utils/StringUtils.java +++ b/src/com/gitblit/utils/StringUtils.java @@ -16,13 +16,23 @@ package com.gitblit.utils;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Set;
import java.util.regex.PatternSyntaxException;
/**
@@ -550,4 +560,36 @@ public class StringUtils { // remember to append any characters to the right of a match
return sb.toString();
}
+
+ /**
+ * Decodes a string by trying several charsets until one does not throw a
+ * coding exception. Last resort is to interpret as UTF-8 with illegal
+ * character substitution.
+ *
+ * @param content
+ * @param charsets optional
+ * @return a string
+ */
+ public static String decodeString(byte [] content, String... charsets) {
+ Set<String> sets = new LinkedHashSet<String>();
+ if (!ArrayUtils.isEmpty(charsets)) {
+ sets.addAll(Arrays.asList(charsets));
+ }
+ sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));
+ for (String charset : sets) {
+ try {
+ Charset cs = Charset.forName(charset);
+ CharsetDecoder decoder = cs.newDecoder();
+ CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));
+ return buffer.toString();
+ } catch (CharacterCodingException e) {
+ // ignore and advance to the next charset
+ } catch (IllegalCharsetNameException e) {
+ // ignore illegal charset names
+ } catch (UnsupportedCharsetException e) {
+ // ignore unsupported charsets
+ }
+ }
+ return new String(content, Charset.forName("UTF-8"));
+ }
}
\ No newline at end of file |