]> source.dussan.org Git - gitblit.git/commitdiff
Strip hidden UTF-8 BOM from string content
authorJames Moger <james.moger@gitblit.com>
Fri, 10 Aug 2012 21:34:47 +0000 (17:34 -0400)
committerJames Moger <james.moger@gitblit.com>
Fri, 10 Aug 2012 21:34:47 +0000 (17:34 -0400)
src/com/gitblit/utils/StringUtils.java

index baed5f0c5912f175d705d935e0cf264b002e8ef0..412a920fe54db492124d45300d774eff19ede060 100644 (file)
@@ -575,13 +575,15 @@ public class StringUtils {
                if (!ArrayUtils.isEmpty(charsets)) {\r
                        sets.addAll(Arrays.asList(charsets));\r
                }\r
+               String value = null;\r
                sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name()));\r
                for (String charset : sets) {\r
                        try {\r
                                Charset cs = Charset.forName(charset);\r
                                CharsetDecoder decoder = cs.newDecoder();\r
                                CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content));\r
-                               return buffer.toString();\r
+                               value = buffer.toString();\r
+                               break;\r
                        } catch (CharacterCodingException e) {\r
                                // ignore and advance to the next charset\r
                        } catch (IllegalCharsetNameException e) {\r
@@ -590,6 +592,11 @@ public class StringUtils {
                                // ignore unsupported charsets\r
                        }\r
                }\r
-               return new String(content, Charset.forName("UTF-8"));\r
+               value = new String(content, Charset.forName("UTF-8"));\r
+               if (value.startsWith("\uFEFF")) {\r
+                       // strip UTF-8 BOM\r
+            return value.substring(1);\r
+        }\r
+               return value;\r
        }\r
 }
\ No newline at end of file