From: Julien HENRY Date: Fri, 4 Jan 2013 15:15:54 +0000 (+0100) Subject: SONARPLUGINS-2560 Set encoding properly when reading content from Sonar server X-Git-Tag: 2.5-rc1~233 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=48138df1a8a5ff33a1ef71e2fe5ce413b20b95b2;p=sonar-scanner-cli.git SONARPLUGINS-2560 Set encoding properly when reading content from Sonar server --- diff --git a/src/main/java/org/sonar/runner/Bootstrapper.java b/src/main/java/org/sonar/runner/Bootstrapper.java index 47c7452..00d3d8c 100644 --- a/src/main/java/org/sonar/runner/Bootstrapper.java +++ b/src/main/java/org/sonar/runner/Bootstrapper.java @@ -19,7 +19,12 @@ */ package org.sonar.runner; -import java.io.*; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; @@ -122,7 +127,11 @@ class Bootstrapper { String remoteContent(String path) throws IOException { String fullUrl = serverUrl + path; HttpURLConnection conn = newHttpConnection(new URL(fullUrl)); - Reader reader = new InputStreamReader((InputStream) conn.getContent()); + String charset = IOUtils.getCharsetFromContentType(conn.getContentType()); + if (charset == null || "".equals(charset)) { + charset = "UTF-8"; + } + Reader reader = new InputStreamReader((InputStream) conn.getContent(), charset); try { int statusCode = conn.getResponseCode(); if (statusCode != HttpURLConnection.HTTP_OK) { diff --git a/src/main/java/org/sonar/runner/IOUtils.java b/src/main/java/org/sonar/runner/IOUtils.java index 90448ef..9109f3f 100644 --- a/src/main/java/org/sonar/runner/IOUtils.java +++ b/src/main/java/org/sonar/runner/IOUtils.java @@ -19,7 +19,16 @@ */ package org.sonar.runner; -import java.io.*; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Reader; +import java.io.StringWriter; +import java.io.Writer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Internal class used only by the Runner as we don't want it to depend on third-party libs. @@ -36,6 +45,8 @@ final class IOUtils { */ private static final int DEFAULT_BUFFER_SIZE = 1024 * 4; + private static final Pattern CHARSET_PATTERN = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)"); + /** * Unconditionally close a Closeable. */ @@ -99,4 +110,22 @@ final class IOUtils { } } + /** + * Parse out a charset from a content type header. + * + * @param contentType e.g. "text/html; charset=EUC-JP" + * @return "EUC-JP", or null if not found. Charset is trimmed and uppercased. + */ + static String getCharsetFromContentType(String contentType) { + if (contentType == null) { + return null; + } + + Matcher m = CHARSET_PATTERN.matcher(contentType); + if (m.find()) { + return m.group(1).trim().toUpperCase(); + } + return null; + } + }