aboutsummaryrefslogtreecommitdiffstats
path: root/sonar-batch
diff options
context:
space:
mode:
authorJulien HENRY <julien.henry@sonarsource.com>2014-11-25 14:49:07 +0100
committerJulien HENRY <julien.henry@sonarsource.com>2014-11-25 14:57:48 +0100
commite0c4341134b434b0766a6aa97310c53b65acdfc1 (patch)
treebc2c3a326619c1634b267f0b988035bbb856e506 /sonar-batch
parent6e5b50ef39f0eaaef1f97a23b0454bcad311ed40 (diff)
downloadsonarqube-e0c4341134b434b0766a6aa97310c53b65acdfc1.tar.gz
sonarqube-e0c4341134b434b0766a6aa97310c53b65acdfc1.zip
SONAR-5868 Try to reduce memory consumption of line hashes
Diffstat (limited to 'sonar-batch')
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java22
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java10
-rw-r--r--sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java14
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java26
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java49
-rw-r--r--sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java8
6 files changed, 94 insertions, 35 deletions
diff --git a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
index 13b644577f8..a45b7a462ac 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/index/SourcePersister.java
@@ -20,7 +20,7 @@
package org.sonar.batch.index;
import com.google.common.base.CharMatcher;
-import com.google.common.base.Joiner;
+import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
@@ -158,7 +158,7 @@ public class SourcePersister implements ScanPersister {
.setFileUuid(fileUuid)
.setData(newData)
.setDataHash(newDataHash)
- .setLineHashes(StringUtils.defaultIfEmpty(Joiner.on('\n').join(inputFile.lineHashes()), null))
+ .setLineHashes(lineHashesAsMd5Hex(inputFile))
.setCreatedAt(now.getTime())
.setUpdatedAt(now.getTime());
mapper.insert(newFileSource);
@@ -167,7 +167,7 @@ public class SourcePersister implements ScanPersister {
if (!newDataHash.equals(previous.getDataHash())) {
previous
.setData(newData)
- .setLineHashes(StringUtils.defaultIfEmpty(Joiner.on('\n').join(inputFile.lineHashes()), null))
+ .setLineHashes(lineHashesAsMd5Hex(inputFile))
.setDataHash(newDataHash)
.setUpdatedAt(now.getTime());
mapper.update(previous);
@@ -177,6 +177,22 @@ public class SourcePersister implements ScanPersister {
}
@CheckForNull
+ private String lineHashesAsMd5Hex(DefaultInputFile inputFile) {
+ if (inputFile.lines() == 0) {
+ return null;
+ }
+ // A md5 string is 32 char long + '\n' = 33
+ StringBuilder result = new StringBuilder(inputFile.lines() * (32 + 1));
+ for (byte[] lineHash : inputFile.lineHashes()) {
+ if (result.length() > 0) {
+ result.append("\n");
+ }
+ result.append(lineHash != null ? Hex.encodeHexString(lineHash) : "");
+ }
+ return result.toString();
+ }
+
+ @CheckForNull
String getSourceData(DefaultInputFile file) {
if (file.lines() == 0) {
return null;
diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java
index e216534167f..3ce3c5872c7 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/DefaultInputFileValueCoder.java
@@ -48,7 +48,9 @@ class DefaultInputFileValueCoder implements ValueCoder {
value.put(f.lines());
putUTFOrNull(value, f.encoding());
value.putLongArray(f.originalLineOffsets());
- value.putStringArray(f.lineHashes());
+ for (int i = 0; i < f.lines(); i++) {
+ value.putByteArray(f.lineHashes()[i]);
+ }
}
private void putUTFOrNull(Value value, @Nullable String utfOrNull) {
@@ -75,7 +77,11 @@ class DefaultInputFileValueCoder implements ValueCoder {
file.setLines(value.getInt());
file.setEncoding(value.getString());
file.setOriginalLineOffsets(value.getLongArray());
- file.setLineHashes(value.getStringArray());
+ byte[][] lineHashes = new byte[file.lines()][];
+ for (int i = 0; i < file.lines(); i++) {
+ lineHashes[i] = value.getByteArray();
+ }
+ file.setLineHashes(lineHashes);
return file;
}
diff --git a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
index 95089291ef6..7fdc0f90033 100644
--- a/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
+++ b/sonar-batch/src/main/java/org/sonar/batch/scan/filesystem/FileMetadata.java
@@ -62,7 +62,7 @@ class FileMetadata {
Reader reader = null;
long currentOriginalOffset = 0;
List<Long> originalLineOffsets = new ArrayList<Long>();
- List<String> lineHashes = new ArrayList<String>();
+ List<Object> lineHashes = new ArrayList<Object>();
StringBuilder currentLineStr = new StringBuilder();
int lines = 0;
char c = (char) -1;
@@ -112,7 +112,7 @@ class FileMetadata {
lineHashes.add(md5IgnoreWhitespace(currentLineStr));
}
String filehash = Hex.encodeHexString(globalMd5Digest.digest());
- return new Metadata(lines, filehash, originalLineOffsets, lineHashes.toArray(new String[0]));
+ return new Metadata(lines, filehash, originalLineOffsets, lineHashes.toArray(new byte[0][]));
} catch (IOException e) {
throw new IllegalStateException(String.format("Fail to read file '%s' with encoding '%s'", file.getAbsolutePath(), encoding), e);
@@ -121,12 +121,12 @@ class FileMetadata {
}
}
- private String md5IgnoreWhitespace(StringBuilder currentLineStr) {
+ private byte[] md5IgnoreWhitespace(StringBuilder currentLineStr) {
String reducedLine = StringUtils.replaceChars(currentLineStr.toString(), SPACE_CHARS, "");
if (reducedLine.isEmpty()) {
- return "";
+ return null;
}
- return DigestUtils.md5Hex(reducedLine);
+ return DigestUtils.md5(reducedLine);
}
private byte[] charToBytesUTF(char c) {
@@ -144,9 +144,9 @@ class FileMetadata {
final int lines;
final String hash;
final long[] originalLineOffsets;
- final String[] lineHashes;
+ final byte[][] lineHashes;
- private Metadata(int lines, String hash, List<Long> originalLineOffsets, String[] lineHashes) {
+ private Metadata(int lines, String hash, List<Long> originalLineOffsets, byte[][] lineHashes) {
this.lines = lines;
this.hash = hash;
this.originalLineOffsets = Longs.toArray(originalLineOffsets);
diff --git a/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java b/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java
index b2fde847b13..88b3da63e38 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/index/SourcePersisterTest.java
@@ -19,6 +19,7 @@
*/
package org.sonar.batch.index;
+import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.Rule;
@@ -113,7 +114,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
java.io.File sameFile = new java.io.File(basedir, relativePathSame);
FileUtils.write(sameFile, "unchanged\ncontent");
DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathSame).setLines(2).setAbsolutePath(sameFile.getAbsolutePath())
- .setLineHashes(new String[] {"foo", "bar"});
+ .setLineHashes(new byte[][] {md5("unchanged"), md5("ncontent")});
when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew));
mockResourceCache(relativePathSame, PROJECT_KEY, "uuidsame");
@@ -133,7 +134,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
FileUtils.write(sameFile, "changed\ncontent");
DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathSame).setLines(2)
.setAbsolutePath(sameFile.getAbsolutePath())
- .setLineHashes(new String[] {"foo", "bar"});
+ .setLineHashes(new byte[][] {md5("changed"), md5("content")});
when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew));
mockResourceCache(relativePathSame, PROJECT_KEY, "uuidsame");
@@ -145,7 +146,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime());
assertThat(fileSourceDto.getData()).isEqualTo(
",,,,,,,changed\r\n,,,,,,,content\r\n");
- assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar");
+ assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("changed") + "\n" + md5Hex("content"));
assertThat(fileSourceDto.getDataHash()).isEqualTo("54f7fa51128a7ee577a476974c56568c");
}
@@ -157,7 +158,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
String relativePathEmpty = "src/empty.java";
DefaultInputFile inputFileEmpty = new DefaultInputFile(PROJECT_KEY, relativePathEmpty)
.setLines(0)
- .setLineHashes(new String[] {});
+ .setLineHashes(new byte[][] {});
when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileEmpty));
mockResourceCache(relativePathEmpty, PROJECT_KEY, "uuidempty");
@@ -178,7 +179,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
DefaultInputFile inputFileNew = new DefaultInputFile(PROJECT_KEY, relativePathNew)
.setLines(3)
.setAbsolutePath(newFile.getAbsolutePath())
- .setLineHashes(new String[] {"foo", "bar", "bee"});
+ .setLineHashes(new byte[][] {md5("foo"), md5("bar"), md5("biz")});
when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew));
mockResourceCache(relativePathNew, PROJECT_KEY, "uuidnew");
@@ -189,7 +190,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime());
assertThat(fileSourceDto.getData()).isEqualTo(
",,,,,,,foo\r\n,,,,,,,bar\r\n,,,,,,,biz\r\n");
- assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar\nbee");
+ assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("foo") + "\n" + md5Hex("bar") + "\n" + md5Hex("biz"));
assertThat(fileSourceDto.getDataHash()).isEqualTo("419c2b162018f6bbeb04fc0500d7852d");
}
@@ -207,7 +208,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
.setLines(3)
.setAbsolutePath(newFile.getAbsolutePath())
.setOriginalLineOffsets(new long[] {0, 4, 7})
- .setLineHashes(new String[] {"foo", "bar", "bee"});
+ .setLineHashes(new byte[][] {md5("foo"), md5("bar"), md5("biz")});
when(inputPathCache.all()).thenReturn(Arrays.<InputPath>asList(inputFileNew));
mockResourceCache(relativePathNew, PROJECT_KEY, "uuidnew");
@@ -238,7 +239,7 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
FileSourceDto fileSourceDto = new FileSourceDao(getMyBatis()).select("uuidnew");
assertThat(fileSourceDto.getCreatedAt()).isEqualTo(now.getTime());
assertThat(fileSourceDto.getUpdatedAt()).isEqualTo(now.getTime());
- assertThat(fileSourceDto.getLineHashes()).isEqualTo("foo\nbar\nbee");
+ assertThat(fileSourceDto.getLineHashes()).isEqualTo(md5Hex("foo") + "\n" + md5Hex("bar") + "\n" + md5Hex("biz"));
assertThat(fileSourceDto.getData()).isEqualTo(
"123,julien,2014-10-11T16:44:02+0100,1,4,2,\"0,3,a\",foo\r\n"
+ "234,simon,2014-10-12T16:44:02+0100,,,,\"0,1,cd\",bar\r\n"
@@ -321,4 +322,13 @@ public class SourcePersisterTest extends AbstractDaoTestCase {
sonarFile.setUuid(uuid);
when(resourceCache.get(projectKey + ":" + relativePathEmpty)).thenReturn(sonarFile);
}
+
+ private byte[] md5(String string) {
+ return DigestUtils.md5(string);
+ }
+
+ private String md5Hex(String string) {
+ return DigestUtils.md5Hex(string);
+ }
+
}
diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
index 6814f6888ed..72a415020f1 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/FileMetadataTest.java
@@ -65,7 +65,9 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(3);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
}
@Test
@@ -77,7 +79,10 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.hash).isEqualTo(NON_ASCII);
assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18);
- assertThat(metadata.lineHashes).containsOnly(md5("föo"), md5("bàr"), md5("\u1D11Ebaßz"), "");
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("föo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bàr"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("\u1D11Ebaßz"));
+ assertThat(metadata.lineHashes[3]).isNull();
}
@Test
@@ -89,7 +94,10 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.hash).isEqualTo(NON_ASCII);
assertThat(metadata.originalLineOffsets).containsOnly(0, 5, 10, 18);
- assertThat(metadata.lineHashes).containsOnly(md5("föo"), md5("bàr"), md5("\u1D11Ebaßz"), "");
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("föo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bàr"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("\u1D11Ebaßz"));
+ assertThat(metadata.lineHashes[3]).isNull();
}
@Test
@@ -101,7 +109,9 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(3);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 8);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
}
@Test
@@ -113,7 +123,10 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITH_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 8, 12);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"), "");
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
+ assertThat(metadata.lineHashes[3]).isNull();
}
@Test
@@ -125,7 +138,10 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITH_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9, 13);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"), "");
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
+ assertThat(metadata.lineHashes[3]).isNull();
}
@Test
@@ -137,7 +153,9 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(3);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
}
@Test
@@ -149,7 +167,10 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(4);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_NEW_LINE_FIRST);
assertThat(metadata.originalLineOffsets).containsOnly(0, 1, 5, 10);
- assertThat(metadata.lineHashes).containsOnly("", md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).isNull();
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[3]).containsOnly(md5("baz"));
}
@Test
@@ -161,7 +182,9 @@ public class FileMetadataTest {
assertThat(metadata.lines).isEqualTo(3);
assertThat(metadata.hash).isEqualTo(EXPECTED_HASH_WITHOUT_LATEST_EOL);
assertThat(metadata.originalLineOffsets).containsOnly(0, 4, 9);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
}
@Test
@@ -171,7 +194,9 @@ public class FileMetadataTest {
FileMetadata.Metadata metadata = FileMetadata.INSTANCE.read(tempFile, Charsets.UTF_8);
assertThat(metadata.lines).isEqualTo(3);
- assertThat(metadata.lineHashes).containsOnly(md5("foo"), md5("bar"), md5("baz"));
+ assertThat(metadata.lineHashes[0]).containsOnly(md5("foo"));
+ assertThat(metadata.lineHashes[1]).containsOnly(md5("bar"));
+ assertThat(metadata.lineHashes[2]).containsOnly(md5("baz"));
}
@Test
@@ -204,7 +229,7 @@ public class FileMetadataTest {
assertThat(hash1).isNotEqualTo(hash2);
}
- private static String md5(String input) {
- return DigestUtils.md5Hex(input);
+ private static byte[] md5(String input) {
+ return DigestUtils.md5(input);
}
}
diff --git a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java
index bbb63290841..521230b5826 100644
--- a/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java
+++ b/sonar-batch/src/test/java/org/sonar/batch/scan/filesystem/InputPathCacheTest.java
@@ -19,6 +19,7 @@
*/
package org.sonar.batch.scan.filesystem;
+import org.apache.commons.codec.digest.DigestUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
@@ -66,17 +67,18 @@ public class InputPathCacheTest {
.setType(Type.MAIN)
.setStatus(Status.ADDED)
.setHash("xyz")
- .setLines(1)
+ .setLines(2)
.setEncoding("UTF-8")
.setOriginalLineOffsets(new long[] {0, 4})
- .setLineHashes(new String[] {"foo", "bar"})
+ .setLineHashes(new byte[][] {DigestUtils.md5("foo"), DigestUtils.md5("bar")})
.setFile(temp.newFile("Bar.java")));
DefaultInputFile loadedFile = (DefaultInputFile) cache.getFile("struts-core", "src/main/java/Bar.java");
assertThat(loadedFile.relativePath()).isEqualTo("src/main/java/Bar.java");
assertThat(loadedFile.encoding()).isEqualTo("UTF-8");
assertThat(loadedFile.originalLineOffsets()).containsOnly(0, 4);
- assertThat(loadedFile.lineHashes()).containsOnly("foo", "bar");
+ assertThat(loadedFile.lineHashes()[0]).containsOnly(DigestUtils.md5("foo"));
+ assertThat(loadedFile.lineHashes()[1]).containsOnly(DigestUtils.md5("bar"));
assertThat(cache.filesByModule("struts")).hasSize(1);
assertThat(cache.filesByModule("struts-core")).hasSize(1);