summaryrefslogtreecommitdiffstats
path: root/sonar-batch
diff options
context:
space:
mode:
authorJulien HENRY <julien.henry@sonarsource.com>2015-02-12 14:31:52 +0100
committerJulien HENRY <julien.henry@sonarsource.com>2015-02-12 15:07:46 +0100
commite14a07c4fa5ae1c4a1e407f2a5407a8eb0b3428f (patch)
tree84bd47ab7b2c622184041f110eea27c09ddda3f7 /sonar-batch
parent1eaa3b9a440cf530700e1c70c3872291bbedd847 (diff)
downloadsonarqube-e14a07c4fa5ae1c4a1e407f2a5407a8eb0b3428f.tar.gz
sonarqube-e14a07c4fa5ae1c4a1e407f2a5407a8eb0b3428f.zip
SONAR-6182 Log a warning when unmappable character is detected in a file
Diffstat (limited to 'sonar-batch')
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java39
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/issue/tracking/FileHashes.java36
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java96
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingDecoratorTest.java4
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingTest.java4
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java12
6 files changed, 124 insertions, 67 deletions
diff --git a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
index 14cd85e371a..cf1fae2da06 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
@@ -19,6 +19,7 @@
*/
package org.sonar.batch.index;
+import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.ibatis.session.ResultContext;
import org.apache.ibatis.session.ResultHandler;
@@ -26,6 +27,8 @@ import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.batch.fs.internal.DefaultInputFile;
import org.sonar.api.utils.System2;
import org.sonar.batch.ProjectTree;
+import org.sonar.batch.scan.filesystem.FileMetadata;
+import org.sonar.batch.scan.filesystem.FileMetadata.LineHashConsumer;
import org.sonar.batch.scan.filesystem.InputFileMetadata;
import org.sonar.batch.scan.filesystem.InputPathCache;
import org.sonar.core.persistence.DbSession;
@@ -34,10 +37,9 @@ import org.sonar.core.source.db.FileSourceDto;
import org.sonar.core.source.db.FileSourceMapper;
import javax.annotation.CheckForNull;
+import javax.annotation.Nullable;
-import java.io.BufferedReader;
import java.io.IOException;
-import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;
@@ -98,7 +100,7 @@ public class SourcePersister implements ScanPersister {
.setBinaryData(data)
.setDataHash(dataHash)
.setSrcHash(metadata.hash())
- .setLineHashes(lineHashesAsMd5Hex(inputFile, metadata))
+ .setLineHashes(lineHashesAsMd5Hex(inputFile))
.setCreatedAt(system2.now())
.setUpdatedAt(system2.now());
mapper.insert(dto);
@@ -110,7 +112,7 @@ public class SourcePersister implements ScanPersister {
.setBinaryData(data)
.setDataHash(dataHash)
.setSrcHash(metadata.hash())
- .setLineHashes(lineHashesAsMd5Hex(inputFile, metadata))
+ .setLineHashes(lineHashesAsMd5Hex(inputFile))
.setUpdatedAt(system2.now());
mapper.update(previousDto);
session.commit();
@@ -119,34 +121,23 @@ public class SourcePersister implements ScanPersister {
}
@CheckForNull
- private String lineHashesAsMd5Hex(DefaultInputFile f, InputFileMetadata metadata) {
+ private String lineHashesAsMd5Hex(DefaultInputFile f) {
if (f.lines() == 0) {
return null;
}
// A md5 string is 32 char long + '\n' = 33
- StringBuilder result = new StringBuilder(f.lines() * (32 + 1));
+ final StringBuilder result = new StringBuilder(f.lines() * (32 + 1));
- try {
- BufferedReader reader = Files.newBufferedReader(f.path(), f.charset());
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < f.lines(); i++) {
- String lineStr = reader.readLine();
- lineStr = lineStr == null ? "" : lineStr;
- for (int j = 0; j < lineStr.length(); j++) {
- char c = lineStr.charAt(j);
- if (!Character.isWhitespace(c)) {
- sb.append(c);
- }
- }
- if (i > 0) {
+ FileMetadata.computeLineHashesForIssueTracking(f, new LineHashConsumer() {
+
+ @Override
+ public void consume(int lineIdx, @Nullable byte[] hash) {
+ if (lineIdx > 0) {
result.append("\n");
}
- result.append(sb.length() > 0 ? DigestUtils.md5Hex(sb.toString()) : "");
- sb.setLength(0);
+ result.append(hash != null ? Hex.encodeHexString(hash) : "");
}
- } catch (Exception e) {
- throw new IllegalStateException("Unable to compute line hashes of file " + f, e);
- }
+ });
return result.toString();
}
diff --git a/sonar-batch/src/main/java/org/sonar/batch/issue/tracking/FileHashes.java b/sonar-batch/src/main/java/org/sonar/batch/issue/tracking/FileHashes.java
index fbc293e8004..996351c26f6 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/issue/tracking/FileHashes.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/issue/tracking/FileHashes.java
@@ -19,17 +19,16 @@
*/
package org.sonar.batch.issue.tracking;
-import com.google.common.base.Charsets;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import org.apache.commons.codec.binary.Hex;
-import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.ObjectUtils;
import org.sonar.api.batch.fs.internal.DefaultInputFile;
+import org.sonar.batch.scan.filesystem.FileMetadata;
+import org.sonar.batch.scan.filesystem.FileMetadata.LineHashConsumer;
+
+import javax.annotation.Nullable;
-import java.io.BufferedReader;
-import java.nio.file.Files;
-import java.security.MessageDigest;
import java.util.Collection;
/**
@@ -56,27 +55,14 @@ public final class FileHashes {
}
public static FileHashes create(DefaultInputFile f) {
- byte[][] hashes = new byte[f.lines()][];
- try {
- BufferedReader reader = Files.newBufferedReader(f.path(), f.charset());
- MessageDigest lineMd5Digest = DigestUtils.getMd5Digest();
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < f.lines(); i++) {
- String lineStr = reader.readLine();
- if (lineStr != null) {
- for (int j = 0; j < lineStr.length(); j++) {
- char c = lineStr.charAt(j);
- if (!Character.isWhitespace(c)) {
- sb.append(c);
- }
- }
- }
- hashes[i] = sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null;
- sb.setLength(0);
+ final byte[][] hashes = new byte[f.lines()][];
+ FileMetadata.computeLineHashesForIssueTracking(f, new LineHashConsumer() {
+
+ @Override
+ public void consume(int lineIdx, @Nullable byte[] hash) {
+ hashes[lineIdx - 1] = hash;
}
- } catch (Exception e) {
- throw new IllegalStateException("Unable to compute line hashes of file " + f, e);
- }
+ });
int size = hashes.length;
Multimap<String, Integer> linesByHash = LinkedHashMultimap.create();
diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
index 0f91e4ae3cc..6e3031a9059 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
@@ -25,10 +25,15 @@ import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.sonar.api.BatchComponent;
+import org.sonar.api.CoreProperties;
import org.sonar.api.batch.AnalysisMode;
+import org.sonar.api.batch.fs.internal.DefaultInputFile;
import javax.annotation.CheckForNull;
+import javax.annotation.Nullable;
import java.io.BufferedReader;
import java.io.File;
@@ -47,6 +52,8 @@ import java.util.List;
*/
public class FileMetadata implements BatchComponent {
+ private static final Logger LOG = LoggerFactory.getLogger(FileMetadata.class);
+
private static final char LINE_FEED = '\n';
private static final char CARRIAGE_RETURN = '\r';
private final AnalysisMode analysisMode;
@@ -55,7 +62,7 @@ public class FileMetadata implements BatchComponent {
this.analysisMode = analysisMode;
}
- private abstract class CharHandler {
+ private static abstract class CharHandler {
void handleAll(char c) {
}
@@ -70,15 +77,28 @@ public class FileMetadata implements BatchComponent {
}
}
- private class LineCounter extends CharHandler {
+ private static class LineCounter extends CharHandler {
private boolean empty = true;
private int lines = 1;
private int nonBlankLines = 0;
private boolean blankLine = true;
+ boolean alreadyLoggedInvalidCharacter = false;
+ private final File file;
+ private final Charset encoding;
+
+ LineCounter(File file, Charset encoding) {
+ this.file = file;
+ this.encoding = encoding;
+ }
@Override
void handleAll(char c) {
this.empty = false;
+ if (!alreadyLoggedInvalidCharacter && c == '\ufffd') {
+ LOG.warn("Invalid character encountered in file " + file + " at line " + lines
+ + " for encoding " + encoding + ". Please fix file content or configure the encoding to be used using property '" + CoreProperties.ENCODING_PROPERTY + "'.");
+ alreadyLoggedInvalidCharacter = true;
+ }
}
@Override
@@ -117,10 +137,9 @@ public class FileMetadata implements BatchComponent {
}
}
- private class FileHashComputer extends CharHandler {
+ private static class FileHashComputer extends CharHandler {
private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest();
-
- StringBuffer sb = new StringBuffer();
+ private StringBuffer sb = new StringBuffer();
@Override
void handleIgnoreEoL(char c) {
@@ -147,7 +166,38 @@ public class FileMetadata implements BatchComponent {
}
}
- private class LineOffsetCounter extends CharHandler {
+ private static class LineHashComputer extends CharHandler {
+ private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest();
+ private final StringBuffer sb = new StringBuffer();
+ private final LineHashConsumer consumer;
+ private int line = 1;
+
+ public LineHashComputer(LineHashConsumer consumer) {
+ this.consumer = consumer;
+ }
+
+ @Override
+ void handleIgnoreEoL(char c) {
+ if (!Character.isWhitespace(c)) {
+ sb.append(c);
+ }
+ }
+
+ @Override
+ void newLine() {
+ consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null);
+ sb.setLength(0);
+ line++;
+ }
+
+ @Override
+ void eof() {
+ consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(Charsets.UTF_8)) : null);
+ }
+
+ }
+
+ private static class LineOffsetCounter extends CharHandler {
private int currentOriginalOffset = 0;
private List<Integer> originalLineOffsets = new ArrayList<Integer>();
@@ -176,17 +226,21 @@ public class FileMetadata implements BatchComponent {
* Maximum performance is needed.
*/
Metadata read(File file, Charset encoding) {
- char c = (char) 0;
- LineCounter lineCounter = new LineCounter();
+ LineCounter lineCounter = new LineCounter(file, encoding);
FileHashComputer fileHashComputer = new FileHashComputer();
LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
- CharHandler[] handlers;
- if (analysisMode.isPreview()) {
- // No need to compute line offsets in preview mode since there is no syntax highlighting
- handlers = new CharHandler[] {lineCounter, fileHashComputer};
+ if (!analysisMode.isPreview()) {
+ scanFile(file, encoding, lineCounter, fileHashComputer, lineOffsetCounter);
} else {
- handlers = new CharHandler[] {lineCounter, fileHashComputer, lineOffsetCounter};
+ // No need to compute line offsets in preview mode since there is no syntax highlighting
+ scanFile(file, encoding, lineCounter, fileHashComputer);
}
+ return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(),
+ lineCounter.isEmpty());
+ }
+
+ private static void scanFile(File file, Charset encoding, CharHandler... handlers) {
+ char c = (char) 0;
try (BOMInputStream bomIn = new BOMInputStream(new FileInputStream(file),
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
Reader reader = new BufferedReader(new InputStreamReader(bomIn, encoding))) {
@@ -224,9 +278,6 @@ public class FileMetadata implements BatchComponent {
for (CharHandler handler : handlers) {
handler.eof();
}
- return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(),
- lineCounter.isEmpty());
-
} catch (IOException e) {
throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e);
}
@@ -247,4 +298,17 @@ public class FileMetadata implements BatchComponent {
this.originalLineOffsets = Ints.toArray(originalLineOffsets);
}
}
+
+ public static interface LineHashConsumer {
+
+ void consume(int lineIdx, @Nullable byte[] hash);
+
+ }
+
+ /**
+ * Compute a MD5 hash of each line of the file after removing of all blank chars
+ */
+ public static void computeLineHashesForIssueTracking(DefaultInputFile f, LineHashConsumer consumer) {
+ scanFile(f.file(), f.charset(), new LineHashComputer(consumer));
+ }
}
diff --git a/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingDecoratorTest.java b/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingDecoratorTest.java
index 1c5e080cd06..439457caa78 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingDecoratorTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingDecoratorTest.java
@@ -22,6 +22,7 @@ package org.sonar.batch.issue.tracking;
import com.google.common.base.Charsets;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
import org.junit.Before;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
@@ -224,8 +225,9 @@ public class IssueTrackingDecoratorTest {
DefaultInputFile inputFile = mock(DefaultInputFile.class);
java.io.File f = temp.newFile();
when(inputFile.path()).thenReturn(f.toPath());
+ when(inputFile.file()).thenReturn(f);
when(inputFile.charset()).thenReturn(Charsets.UTF_8);
- when(inputFile.lines()).thenReturn(newSource.split("\n").length);
+ when(inputFile.lines()).thenReturn(StringUtils.countMatches(newSource, "\n") + 1);
FileUtils.write(f, newSource, Charsets.UTF_8);
when(inputFile.key()).thenReturn("foo:Action.java");
when(inputPathCache.getFile("foo", "Action.java")).thenReturn(inputFile);
diff --git a/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingTest.java b/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingTest.java
index bcc25ac598c..095429f3cbf 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/issue/tracking/IssueTrackingTest.java
@@ -25,6 +25,7 @@ import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@@ -354,9 +355,10 @@ public class IssueTrackingTest {
DefaultInputFile inputFile = mock(DefaultInputFile.class);
File f = temp.newFile();
when(inputFile.path()).thenReturn(f.toPath());
+ when(inputFile.file()).thenReturn(f);
when(inputFile.charset()).thenReturn(Charsets.UTF_8);
String data = load(newSource);
- when(inputFile.lines()).thenReturn(data.split("\n").length);
+ when(inputFile.lines()).thenReturn(StringUtils.countMatches(data, "\n") + 1);
FileUtils.write(f, data, Charsets.UTF_8);
when(inputFile.key()).thenReturn("foo:Action.java");
when(lastSnapshots.getLineHashes("foo:Action.java")).thenReturn(computeHexHashes(load(reference)));
diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
index b34c5d50271..b05c9cc414c 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
@@ -28,6 +28,7 @@ import org.junit.rules.TemporaryFolder;
import org.sonar.api.batch.AnalysisMode;
import java.io.File;
+import java.nio.charset.Charset;
import static org.apache.commons.codec.digest.DigestUtils.md5Hex;
import static org.assertj.core.api.Assertions.assertThat;
@@ -70,6 +71,17 @@ public class FileMetadataTest {
}
@Test
+ public void read_with_wrong_encoding() throws Exception {
+ File tempFile = temp.newFile();
+ FileUtils.write(tempFile, "marker´s\n", Charset.forName("cp1252"));
+
+ FileMetadata.Metadata metadata = new FileMetadata(mode).read(tempFile, Charsets.UTF_8);
+ assertThat(metadata.lines).isEqualTo(2);
+ assertThat(metadata.hash).isEqualTo(md5Hex("marker\ufffds\n"));
+ assertThat(metadata.originalLineOffsets).containsOnly(0, 9);
+ }
+
+ @Test
public void non_ascii_utf_8() throws Exception {
File tempFile = temp.newFile();
FileUtils.write(tempFile, "föo\r\nbàr\r\n\u1D11Ebaßz\r\n", Charsets.UTF_8, true);