diff options
author | Julien HENRY <julien.henry@sonarsource.com> | 2014-11-25 14:49:07 +0100 |
---|---|---|
committer | Julien HENRY <julien.henry@sonarsource.com> | 2014-11-25 14:57:48 +0100 |
commit | e0c4341134b434b0766a6aa97310c53b65acdfc1 (patch) | |
tree | bc2c3a326619c1634b267f0b988035bbb856e506 /sonar-batch | |
parent | 6e5b50ef39f0eaaef1f97a23b0454bcad311ed40 (diff) | |
download | sonarqube-e0c4341134b434b0766a6aa97310c53b65acdfc1.tar.gz sonarqube-e0c4341134b434b0766a6aa97310c53b65acdfc1.zip |
SONAR-5868 Try to reduce memory consumption of line hashes
Diffstat (limited to 'sonar-batch')
6 files changed, 94 insertions, 35 deletions
diff --git a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java index 13b644577f8..a45b7a462ac 100644 --- a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java +++ b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java @@ -20,7 +20,7 @@ package org.sonar.batch.index; import com.google.common.base.CharMatcher; -import com.google.common.base.Joiner; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; @@ -158,7 +158,7 @@ public class SourcePersister implements ScanPersister { .setFileUuid(fileUuid) .setData(newData) .setDataHash(newDataHash) - .setLineHashes(StringUtils.defaultIfEmpty(Joiner.on('\n').join(inputFile.lineHashes()), null)) + .setLineHashes(lineHashesAsMd5Hex(inputFile)) .setCreatedAt(now.getTime()) .setUpdatedAt(now.getTime()); mapper.insert(newFileSource); @@ -167,7 +167,7 @@ public class SourcePersister implements ScanPersister { if (!newDataHash.equals(previous.getDataHash())) { previous .setData(newData) - .setLineHashes(StringUtils.defaultIfEmpty(Joiner.on('\n').join(inputFile.lineHashes()), null)) + .setLineHashes(lineHashesAsMd5Hex(inputFile)) .setDataHash(newDataHash) .setUpdatedAt(now.getTime()); mapper.update(previous); @@ -177,6 +177,22 @@ public class SourcePersister implements ScanPersister { } @CheckForNull + private String lineHashesAsMd5Hex(DefaultInputFile inputFile) { + if (inputFile.lines() == 0) { + return null; + } + // A md5 string is 32 char long + '\n' = 33 + StringBuilder result = new StringBuilder(inputFile.lines() * (32 + 1)); + for (byte[] lineHash : inputFile.lineHashes()) { + if (result.length() > 0) { + result.append("\n"); + } + result.append(lineHash != null ? Hex.encodeHexString(lineHash) : ""); + } + return result.toString(); + } + + @CheckForNull String getSourceData(DefaultInputFile file) { if (file.lines() == 0) { return null; diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java index e216534167f..3ce3c5872c7 100644 --- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java +++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java @@ -48,7 +48,9 @@ class DefaultInputFileValueCoder implements ValueCoder { value.put(f.lines()); putUTFOrNull(value, f.encoding()); value.putLongArray(f.originalLineOffsets()); - value.putStringArray(f.lineHashes()); + for (int i = 0; i < f.lines(); i++) { + value.putByteArray(f.lineHashes()[i]); + } } private void putUTFOrNull(Value value, @Nullable String utfOrNull) { @@ -75,7 +77,11 @@ class DefaultInputFileValueCoder implements ValueCoder { file.setLines(value.getInt()); file.setEncoding(value.getString()); file.setOriginalLineOffsets(value.getLongArray()); - file.setLineHashes(value.getStringArray()); + byte[][] lineHashes = new byte[file.lines()][]; + for (int i = 0; i < file.lines(); i++) { + lineHashes[i] = value.getByteArray(); + } + file.setLineHashes(lineHashes); return file; } diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java index 95089291ef6..7fdc0f90033 100644 --- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java +++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java @@ -62,7 +62,7 @@ class FileMetadata { Reader reader = null; long currentOriginalOffset = 0; List<Long> originalLineOffsets = new ArrayList<Long>(); - List<String> lineHashes = new ArrayList<String>(); + List<Object> lineHashes = new ArrayList<Object>(); StringBuilder currentLineStr = new StringBuilder(); int lines = 0; char c = (char) -1; @@ -112,7 +112,7 @@ class FileMetadata { lineHashes.add(md5IgnoreWhitespace(currentLineStr)); } String filehash = Hex.encodeHexString(globalMd5Digest.digest()); - return new Metadata(lines, filehash, originalLineOffsets, lineHashes.toArray(new String[0])); + return new Metadata(lines, filehash, originalLineOffsets, lineHashes.toArray(new byte[0][])); } catch (IOException e) { throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e); @@ -121,12 +121,12 @@ class FileMetadata { } } - private String md5IgnoreWhitespace(StringBuilder currentLineStr) { + private byte[] md5IgnoreWhitespace(StringBuilder currentLineStr) { String reducedLine = StringUtils.replaceChars(currentLineStr.toString(), SPACE_CHARS, ""); if (reducedLine.isEmpty()) { - return ""; + return null; } - return DigestUtils.md5Hex(reducedLine); + return DigestUtils.md5(reducedLine); } private byte[] charToBytesUTF(char c) { @@ -144,9 +144,9 @@ class FileMetadata { final int lines; final String hash; final long[] originalLineOffsets; - final String[] lineHashes; + final byte[][] lineHashes; - private Metadata(int lines, String hash, List<Long> originalLineOffsets, String[] lineHashes) { + private Metadata(int lines, String hash, List<Long> originalLineOffsets, byte[][] lineHashes) { this.lines = lines; this.hash = hash; this.originalLineOffsets = Longs.toArray(originalLineOffsets); diff --git a/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java b/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java index b2fde847b13..88b3da63e38 100644 --- a/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java +++ b/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java @@ -19,6 +19,7 @@ */ package org.sonar.batch.index; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; import org.junit.Before; import org.junit.Rule; @@ -113,7 +114,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { java.io.File sameFile = new java.io.File(basedir, relativePathSame); FileUtils.write(sameFile, "unchanged\ncontent"); DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathSame).setLines(2).setAbsolutePath(sameFile.getAbsolutePath()) - .setLineHashes(new String[] {"foo", "bar"}); + .setLineHashes(new byte[][] {md5("unchanged"), md5("ncontent")}); when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew)); mockResourceCache(relativePathSame, PROJECT_KEY, "uuidsame"); @@ -133,7 +134,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { FileUtils.write(sameFile, "changed\ncontent"); DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathSame).setLines(2) .setAbsolutePath(sameFile.getAbsolutePath()) - .setLineHashes(new String[] {"foo", "bar"}); + .setLineHashes(new byte[][] {md5("changed"), md5("content")}); when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew)); mockResourceCache(relativePathSame, PROJECT_KEY, "uuidsame"); @@ -145,7 +146,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime()); assertThat(fileSourceDto.getData()).isEqualTo( ",,,,,,,changed\r\n,,,,,,,content\r\n"); - assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar"); + assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("changed") + "\n" + md5Hex("content")); assertThat(fileSourceDto.getDataHash()).isEqualTo("54f7fa51128a7ee577a476974c56568c"); } @@ -157,7 +158,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { String relativePathEmpty = "src/empty.java"; DefaultInputFile inputFileEmpty = new DefaultInputFile(PROJECT_KEY, relativePathEmpty) .setLines(0) - .setLineHashes(new String[] {}); + .setLineHashes(new byte[][] {}); when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileEmpty)); mockResourceCache(relativePathEmpty, PROJECT_KEY, "uuidempty"); @@ -178,7 +179,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathNew) .setLines(3) .setAbsolutePath(newFile.getAbsolutePath()) - .setLineHashes(new String[] {"foo", "bar", "bee"}); + .setLineHashes(new byte[][] {md5("foo"), md5("bar"), md5("biz")}); when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew)); mockResourceCache(relativePathNew, PROJECT_KEY, "uuidnew"); @@ -189,7 +190,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime()); assertThat(fileSourceDto.getData()).isEqualTo( ",,,,,,,foo\r\n,,,,,,,bar\r\n,,,,,,,biz\r\n"); - assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar\nbee"); + assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("foo") + "\n" + md5Hex("bar") + "\n" + md5Hex("biz")); assertThat(fileSourceDto.getDataHash()).isEqualTo("419c2b162018f6bbeb04fc0500d7852d"); } @@ -207,7 +208,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { .setLines(3) .setAbsolutePath(newFile.getAbsolutePath()) .setOriginalLineOffsets(new long[] {0, 4, 7}) - .setLineHashes(new String[] {"foo", "bar", "bee"}); + .setLineHashes(new byte[][] {md5("foo"), md5("bar"), md5("biz")}); when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew)); mockResourceCache(relativePathNew, PROJECT_KEY, "uuidnew"); @@ -238,7 +239,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase { FileSourceDto fileSourceDto = new FileSourceDao(getMyBatis()).select("uuidnew"); assertThat(fileSourceDto.getCreatedAt()).isEqualTo(now.getTime()); assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime()); - assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar\nbee"); + assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("foo") + "\n" + md5Hex("bar") + "\n" + md5Hex("biz")); assertThat(fileSourceDto.getData()).isEqualTo( "123,julien,2014-10-11T16:44:02+0100,1,4,2,\"0,3,a\",foo\r\n" + "234,simon,2014-10-12T16:44:02+0100,,,,\"0,1,cd\",bar\r\n" @@ -321,4 +322,13 @@ public class SourcePersisterTest extends AbstractDaoTestCase { sonarFile.setUuid(uuid); when(resourceCache.get(projectKey + ":" + relativePathEmpty)).thenReturn(sonarFile); } + + private byte[] md5(String string) { + return DigestUtils.md5(string); + } + + private String md5Hex(String string) { + return DigestUtils.md5Hex(string); + } + } diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java index 6814f6888ed..72a415020f1 100644 --- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java +++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java @@ -65,7 +65,9 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(3); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); } @Test @@ -77,7 +79,10 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.hash).isEqualTo(NON_ASCII); assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18); - assertThat(metadata.lineHashes).containsOnly(md5("föo"), md5("bàr"), md5("\u1D11Ebaßz"), ""); + assertThat(metadata.lineHashes[0]).containsOnly(md5("föo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bàr")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("\u1D11Ebaßz")); + assertThat(metadata.lineHashes[3]).isNull(); } @Test @@ -89,7 +94,10 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.hash).isEqualTo(NON_ASCII); assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18); - assertThat(metadata.lineHashes).containsOnly(md5("föo"), md5("bàr"), md5("\u1D11Ebaßz"), ""); + assertThat(metadata.lineHashes[0]).containsOnly(md5("föo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bàr")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("\u1D11Ebaßz")); + assertThat(metadata.lineHashes[3]).isNull(); } @Test @@ -101,7 +109,9 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(3); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 8); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); } @Test @@ -113,7 +123,10 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITH_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 8, 12); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"), ""); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); + assertThat(metadata.lineHashes[3]).isNull(); } @Test @@ -125,7 +138,10 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITH_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9, 13); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"), ""); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); + assertThat(metadata.lineHashes[3]).isNull(); } @Test @@ -137,7 +153,9 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(3); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); } @Test @@ -149,7 +167,10 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(4); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_NEW_LINE_FIRST); assertThat(metadata.originalLineOffsets).containsOnly(0, 1, 5, 10); - assertThat(metadata.lineHashes).containsOnly("", md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).isNull(); + assertThat(metadata.lineHashes[1]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[3]).containsOnly(md5("baz")); } @Test @@ -161,7 +182,9 @@ public class FileMetadataTest { assertThat(metadata.lines).isEqualTo(3); assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL); assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); } @Test @@ -171,7 +194,9 @@ public class FileMetadataTest { FileMetadata.Metadata metadata = FileMetadata.INSTANCE.read(tempFile, Charsets.UTF_8); assertThat(metadata.lines).isEqualTo(3); - assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz")); + assertThat(metadata.lineHashes[0]).containsOnly(md5("foo")); + assertThat(metadata.lineHashes[1]).containsOnly(md5("bar")); + assertThat(metadata.lineHashes[2]).containsOnly(md5("baz")); } @Test @@ -204,7 +229,7 @@ public class FileMetadataTest { assertThat(hash1).isNotEqualTo(hash2); } - private static String md5(String input) { - return DigestUtils.md5Hex(input); + private static byte[] md5(String input) { + return DigestUtils.md5(input); } } diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java index bbb63290841..521230b5826 100644 --- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java +++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java @@ -19,6 +19,7 @@ */ package org.sonar.batch.scan.filesystem; +import org.apache.commons.codec.digest.DigestUtils; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -66,17 +67,18 @@ public class InputPathCacheTest { .setType(Type.MAIN) .setStatus(Status.ADDED) .setHash("xyz") - .setLines(1) + .setLines(2) .setEncoding("UTF-8") .setOriginalLineOffsets(new long[] {0, 4}) - .setLineHashes(new String[] {"foo", "bar"}) + .setLineHashes(new byte[][] {DigestUtils.md5("foo"), DigestUtils.md5("bar")}) .setFile(temp.newFile("Bar.java"))); DefaultInputFile loadedFile = (DefaultInputFile) cache.getFile("struts-core", "src/main/java/Bar.java"); assertThat(loadedFile.relativePath()).isEqualTo("src/main/java/Bar.java"); assertThat(loadedFile.encoding()).isEqualTo("UTF-8"); assertThat(loadedFile.originalLineOffsets()).containsOnly(0, 4); - assertThat(loadedFile.lineHashes()).containsOnly("foo", "bar"); + assertThat(loadedFile.lineHashes()[0]).containsOnly(DigestUtils.md5("foo")); + assertThat(loadedFile.lineHashes()[1]).containsOnly(DigestUtils.md5("bar")); assertThat(cache.filesByModule("struts")).hasSize(1); assertThat(cache.filesByModule("struts-core")).hasSize(1); |