From: James Moger Date: Fri, 10 Aug 2012 21:34:47 +0000 (-0400) Subject: Strip hidden UTF-8 BOM from string content X-Git-Tag: v1.1.0~25 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=086c0447227a4075b66b976088542fee113b0d4f;p=gitblit.git Strip hidden UTF-8 BOM from string content --- diff --git a/src/com/gitblit/utils/StringUtils.java b/src/com/gitblit/utils/StringUtils.java index baed5f0c..412a920f 100644 --- a/src/com/gitblit/utils/StringUtils.java +++ b/src/com/gitblit/utils/StringUtils.java @@ -575,13 +575,15 @@ public class StringUtils { if (!ArrayUtils.isEmpty(charsets)) { sets.addAll(Arrays.asList(charsets)); } + String value = null; sets.addAll(Arrays.asList("UTF-8", "ISO-8859-1", Charset.defaultCharset().name())); for (String charset : sets) { try { Charset cs = Charset.forName(charset); CharsetDecoder decoder = cs.newDecoder(); CharBuffer buffer = decoder.decode(ByteBuffer.wrap(content)); - return buffer.toString(); + value = buffer.toString(); + break; } catch (CharacterCodingException e) { // ignore and advance to the next charset } catch (IllegalCharsetNameException e) { @@ -590,6 +592,11 @@ public class StringUtils { // ignore unsupported charsets } } - return new String(content, Charset.forName("UTF-8")); + value = new String(content, Charset.forName("UTF-8")); + if (value.startsWith("\uFEFF")) { + // strip UTF-8 BOM + return value.substring(1); + } + return value; } } \ No newline at end of file