aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-plugin-api
diff options
context:
space:
mode:
authorDuarte Meneses <duarte.meneses@sonarsource.com>2015-09-01 15:49:04 +0200
committerDuarte Meneses <duarte.meneses@sonarsource.com>2015-09-04 10:33:16 +0200
commit4cb2a976809bcd5c999aa2342480cb20c59df9e3 (patch)
tree272ec1d9b4b10bfe0f01c5fbe271082cceb10c56 /sonar-plugin-api
parentae4728b46e5b88b084bd0ae939b3c6bf5f81ed01 (diff)
downloadsonarqube-4cb2a976809bcd5c999aa2342480cb20c59df9e3.tar.gz
sonarqube-4cb2a976809bcd5c999aa2342480cb20c59df9e3.zip
Improve file hashing performance
Diffstat (limited to 'sonar-plugin-api')
-rw-r--r--sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java58
-rw-r--r--sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java3
2 files changed, 51 insertions, 10 deletions
diff --git a/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java b/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java
index b368d95738e..8d449f4ab4c 100644
--- a/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java
+++ b/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java
@@ -38,7 +38,12 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
@@ -129,6 +134,15 @@ public class FileMetadata {
private static class FileHashComputer extends CharHandler {
private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest();
private StringBuilder sb = new StringBuilder();
+ private final CharsetEncoder encoder;
+ private final File file;
+
+ public FileHashComputer(File f) {
+ encoder = StandardCharsets.UTF_8.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
+ file = f;
+ }
@Override
protected void handleIgnoreEoL(char c) {
@@ -138,14 +152,25 @@ public class FileMetadata {
@Override
protected void newLine() {
sb.append(LINE_FEED);
- globalMd5Digest.update(sb.toString().getBytes(StandardCharsets.UTF_8));
+ processBuffer();
sb.setLength(0);
}
@Override
protected void eof() {
if (sb.length() > 0) {
- globalMd5Digest.update(sb.toString().getBytes(StandardCharsets.UTF_8));
+ processBuffer();
+ }
+ }
+
+ private void processBuffer() {
+ try {
+ if (sb.length() > 0) {
+ ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
+ globalMd5Digest.update(encoded.array(), 0, encoded.limit());
+ }
+ } catch (CharacterCodingException e) {
+ throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e);
}
}
@@ -157,12 +182,18 @@ public class FileMetadata {
private static class LineHashComputer extends CharHandler {
private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest();
+ private final CharsetEncoder encoder;
private final StringBuilder sb = new StringBuilder();
private final LineHashConsumer consumer;
+ private final File file;
private int line = 1;
- public LineHashComputer(LineHashConsumer consumer) {
+ public LineHashComputer(LineHashConsumer consumer, File f) {
this.consumer = consumer;
+ this.file = f;
+ this.encoder = StandardCharsets.UTF_8.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
}
@Override
@@ -174,7 +205,7 @@ public class FileMetadata {
@Override
protected void newLine() {
- consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(StandardCharsets.UTF_8)) : null);
+ processBuffer();
sb.setLength(0);
line++;
}
@@ -182,10 +213,21 @@ public class FileMetadata {
@Override
protected void eof() {
if (this.line > 0) {
- consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(StandardCharsets.UTF_8)) : null);
+ processBuffer();
}
}
+ private void processBuffer() {
+ try {
+ if (sb.length() > 0) {
+ ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
+ lineMd5Digest.update(encoded.array(), 0, encoded.limit());
+ consumer.consume(line, lineMd5Digest.digest());
+ }
+ } catch (CharacterCodingException e) {
+ throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e);
+ }
+ }
}
private static class LineOffsetCounter extends CharHandler {
@@ -228,7 +270,7 @@ public class FileMetadata {
*/
public Metadata readMetadata(File file, Charset encoding) {
LineCounter lineCounter = new LineCounter(file, encoding);
- FileHashComputer fileHashComputer = new FileHashComputer();
+ FileHashComputer fileHashComputer = new FileHashComputer(file);
LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
readFile(file, encoding, lineCounter, fileHashComputer, lineOffsetCounter);
return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(),
@@ -240,7 +282,7 @@ public class FileMetadata {
*/
public Metadata readMetadata(Reader reader) {
LineCounter lineCounter = new LineCounter(new File("fromString"), StandardCharsets.UTF_16);
- FileHashComputer fileHashComputer = new FileHashComputer();
+ FileHashComputer fileHashComputer = new FileHashComputer(new File("fromString"));
LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
try {
read(reader, lineCounter, fileHashComputer, lineOffsetCounter);
@@ -325,6 +367,6 @@ public class FileMetadata {
* Compute a MD5 hash of each line of the file after removing of all blank chars
*/
public static void computeLineHashesForIssueTracking(DefaultInputFile f, LineHashConsumer consumer) {
- readFile(f.file(), f.charset(), new LineHashComputer(consumer));
+ readFile(f.file(), f.charset(), new LineHashComputer(consumer, f.file()));
}
}
diff --git a/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java b/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java
index 0d00ba28793..9dbd8442527 100644
--- a/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java
+++ b/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java
@@ -103,11 +103,10 @@ public class FileMetadataTest {
public void non_ascii_utf_16() throws Exception {
File tempFile = temp.newFile();
FileUtils.write(tempFile, "föo\r\nbàr\r\n\u1D11Ebaßz\r\n", StandardCharsets.UTF_16, true);
-
FileMetadata.Metadata metadata = new FileMetadata().readMetadata(tempFile, StandardCharsets.UTF_16);
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.nonBlankLines).isEqualTo(3);
- assertThat(metadata.hash).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n"));
+ assertThat(metadata.hash).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n".getBytes(StandardCharsets.UTF_8)));
assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18);
}