*/
package org.sonar.scanner.scan.filesystem;
+import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sonar.api.CoreProperties;
import javax.annotation.CheckForNull;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
class InputFileBuilder {
DefaultInputFile completeAndComputeMetadata(DefaultInputFile inputFile, InputFile.Type type) {
inputFile.setType(type);
inputFile.setModuleBaseDir(fs.baseDir().toPath());
- inputFile.setCharset(fs.encoding());
String lang = langDetection.language(inputFile);
if (lang == null && !settings.getBoolean(CoreProperties.IMPORT_UNKNOWN_FILES_KEY)) {
}
inputFile.setLanguage(lang);
- inputFile.initMetadata(fileMetadata.readMetadata(inputFile.file(), fs.encoding()));
+ Charset charset = detectCharset(inputFile.file(), fs.encoding());
+ inputFile.setCharset(charset);
+
+ inputFile.initMetadata(fileMetadata.readMetadata(inputFile.file(), charset));
inputFile.setStatus(statusDetection.status(inputFile.moduleKey(), inputFile.relativePath(), inputFile.hash()));
return inputFile;
}
+ /**
+ * @return charset detected from BOM in given file or given defaultCharset
+ * @throws IllegalStateException if an I/O error occurs
+ */
+ private static Charset detectCharset(File file, Charset defaultCharset) {
+ try (FileInputStream inputStream = new FileInputStream(file)) {
+ byte[] bom = new byte[4];
+ int n = inputStream.read(bom, 0, bom.length);
+ if ((n >= 3) && (bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
+ return StandardCharsets.UTF_8;
+ } else if ((n >= 4) && (bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
+ return UTF_32BE;
+ } else if ((n >= 4) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
+ return UTF_32LE;
+ } else if ((n >= 2) && (bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
+ return StandardCharsets.UTF_16BE;
+ } else if ((n >= 2) && (bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
+ return StandardCharsets.UTF_16LE;
+ } else {
+ return defaultCharset;
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("Unable to read file " + file.getAbsolutePath(), e);
+ }
+ }
+
+ @VisibleForTesting
+ static final Charset UTF_32BE = Charset.forName("UTF-32BE");
+
+ @VisibleForTesting
+ static final Charset UTF_32LE = Charset.forName("UTF-32LE");
+
}
package org.sonar.scanner.scan.filesystem;
import java.io.File;
+import java.io.FileNotFoundException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.FileUtils;
+import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
StatusDetection statusDetection = mock(StatusDetection.class);
DefaultModuleFileSystem fs = mock(DefaultModuleFileSystem.class);
+ @Test
+ public void should_detect_charset_from_BOM() {
+ File basedir = new File("src/test/resources/org/sonar/scanner/scan/filesystem/");
+ when(fs.baseDir()).thenReturn(basedir);
+ when(fs.encoding()).thenReturn(StandardCharsets.US_ASCII);
+ when(langDetection.language(any(InputFile.class))).thenReturn("java");
+ InputFileBuilder builder = new InputFileBuilder("moduleKey", new PathResolver(), langDetection, statusDetection, fs, new MapSettings(), new FileMetadata());
+
+ assertThat(createAndComplete(builder, new File(basedir, "without_BOM.txt")).charset())
+ .isEqualTo(StandardCharsets.US_ASCII);
+ assertThat(createAndComplete(builder, new File(basedir, "UTF-8.txt")).charset())
+ .isEqualTo(StandardCharsets.UTF_8);
+ assertThat(createAndComplete(builder, new File(basedir, "UTF-16BE.txt")).charset())
+ .isEqualTo(StandardCharsets.UTF_16BE);
+ assertThat(createAndComplete(builder, new File(basedir, "UTF-16LE.txt")).charset())
+ .isEqualTo(StandardCharsets.UTF_16LE);
+ assertThat(createAndComplete(builder, new File(basedir, "UTF-32BE.txt")).charset())
+ .isEqualTo(InputFileBuilder.UTF_32BE);
+ assertThat(createAndComplete(builder, new File(basedir, "UTF-32LE.txt")).charset())
+ .isEqualTo(InputFileBuilder.UTF_32LE);
+
+ try {
+ createAndComplete(builder, new File(basedir, "non_existing"));
+ Assert.fail();
+ } catch (IllegalStateException e) {
+ assertThat(e.getMessage()).isEqualTo("Unable to read file " + new File(basedir, "non_existing").getAbsolutePath());
+ assertThat(e.getCause()).isInstanceOf(FileNotFoundException.class);
+ }
+ }
+
+ private static DefaultInputFile createAndComplete(InputFileBuilder builder, File file) {
+ DefaultInputFile inputFile = builder.create(file);
+ builder.completeAndComputeMetadata(inputFile, InputFile.Type.MAIN);
+ return inputFile;
+ }
+
@Test
public void complete_input_file() throws Exception {
// file system