diff options
author | Duarte Meneses <duarte.meneses@sonarsource.com> | 2015-09-01 15:49:04 +0200 |
---|---|---|
committer | Duarte Meneses <duarte.meneses@sonarsource.com> | 2015-09-04 10:33:16 +0200 |
commit | 4cb2a976809bcd5c999aa2342480cb20c59df9e3 (patch) | |
tree | 272ec1d9b4b10bfe0f01c5fbe271082cceb10c56 /sonar-plugin-api | |
parent | ae4728b46e5b88b084bd0ae939b3c6bf5f81ed01 (diff) | |
download | sonarqube-4cb2a976809bcd5c999aa2342480cb20c59df9e3.tar.gz sonarqube-4cb2a976809bcd5c999aa2342480cb20c59df9e3.zip |
Improve file hashing performance
Diffstat (limited to 'sonar-plugin-api')
-rw-r--r-- | sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java | 58 | ||||
-rw-r--r-- | sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java | 3 |
2 files changed, 51 insertions, 10 deletions
diff --git a/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java b/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java index b368d95738e..8d449f4ab4c 100644 --- a/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java +++ b/sonar-plugin-api/src/main/java/org/sonar/api/batch/fs/internal/FileMetadata.java @@ -38,7 +38,12 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.util.ArrayList; @@ -129,6 +134,15 @@ public class FileMetadata { private static class FileHashComputer extends CharHandler { private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest(); private StringBuilder sb = new StringBuilder(); + private final CharsetEncoder encoder; + private final File file; + + public FileHashComputer(File f) { + encoder = StandardCharsets.UTF_8.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + file = f; + } @Override protected void handleIgnoreEoL(char c) { @@ -138,14 +152,25 @@ public class FileMetadata { @Override protected void newLine() { sb.append(LINE_FEED); - globalMd5Digest.update(sb.toString().getBytes(StandardCharsets.UTF_8)); + processBuffer(); sb.setLength(0); } @Override protected void eof() { if (sb.length() > 0) { - globalMd5Digest.update(sb.toString().getBytes(StandardCharsets.UTF_8)); + processBuffer(); + } + } + + private void processBuffer() { + try { + if (sb.length() > 0) { + ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb)); + globalMd5Digest.update(encoded.array(), 0, encoded.limit()); + } + } catch (CharacterCodingException e) { + throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e); } } @@ -157,12 +182,18 @@ public class FileMetadata { private static class LineHashComputer extends CharHandler { private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest(); + private final CharsetEncoder encoder; private final StringBuilder sb = new StringBuilder(); private final LineHashConsumer consumer; + private final File file; private int line = 1; - public LineHashComputer(LineHashConsumer consumer) { + public LineHashComputer(LineHashConsumer consumer, File f) { this.consumer = consumer; + this.file = f; + this.encoder = StandardCharsets.UTF_8.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); } @Override @@ -174,7 +205,7 @@ public class FileMetadata { @Override protected void newLine() { - consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(StandardCharsets.UTF_8)) : null); + processBuffer(); sb.setLength(0); line++; } @@ -182,10 +213,21 @@ public class FileMetadata { @Override protected void eof() { if (this.line > 0) { - consumer.consume(line, sb.length() > 0 ? lineMd5Digest.digest(sb.toString().getBytes(StandardCharsets.UTF_8)) : null); + processBuffer(); } } + private void processBuffer() { + try { + if (sb.length() > 0) { + ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb)); + lineMd5Digest.update(encoded.array(), 0, encoded.limit()); + consumer.consume(line, lineMd5Digest.digest()); + } + } catch (CharacterCodingException e) { + throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e); + } + } } private static class LineOffsetCounter extends CharHandler { @@ -228,7 +270,7 @@ public class FileMetadata { */ public Metadata readMetadata(File file, Charset encoding) { LineCounter lineCounter = new LineCounter(file, encoding); - FileHashComputer fileHashComputer = new FileHashComputer(); + FileHashComputer fileHashComputer = new FileHashComputer(file); LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); readFile(file, encoding, lineCounter, fileHashComputer, lineOffsetCounter); return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(), @@ -240,7 +282,7 @@ public class FileMetadata { */ public Metadata readMetadata(Reader reader) { LineCounter lineCounter = new LineCounter(new File("fromString"), StandardCharsets.UTF_16); - FileHashComputer fileHashComputer = new FileHashComputer(); + FileHashComputer fileHashComputer = new FileHashComputer(new File("fromString")); LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); try { read(reader, lineCounter, fileHashComputer, lineOffsetCounter); @@ -325,6 +367,6 @@ public class FileMetadata { * Compute a MD5 hash of each line of the file after removing of all blank chars */ public static void computeLineHashesForIssueTracking(DefaultInputFile f, LineHashConsumer consumer) { - readFile(f.file(), f.charset(), new LineHashComputer(consumer)); + readFile(f.file(), f.charset(), new LineHashComputer(consumer, f.file())); } } diff --git a/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java b/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java index 0d00ba28793..9dbd8442527 100644 --- a/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java +++ b/sonar-plugin-api/src/test/java/org/sonar/api/batch/fs/internal/FileMetadataTest.java @@ -103,11 +103,10 @@ public class FileMetadataTest { public void non_ascii_utf_16() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "föo\r\nbàr\r\n\u1D11Ebaßz\r\n", StandardCharsets.UTF_16, true); - FileMetadata.Metadata metadata = new FileMetadata().readMetadata(tempFile, StandardCharsets.UTF_16); assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.nonBlankLines).isEqualTo(3); - assertThat(metadata.hash).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n")); + assertThat(metadata.hash).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n".getBytes(StandardCharsets.UTF_8))); assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18); } |