From d63887127e20c0a70c53c48a9aa5ffbdb1cf8873 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 11 Nov 2010 14:10:32 -0800 Subject: SimilarityIndex: Accept files larger than 8 MB Files bigger than 8 MB (2^23 bytes) tended to overflow the internal hashtable, as the table was capped in size to 2^17 records. If a file contained 2^17 unique data blocks/lines, the table insertion got stuck in an infinite loop as the able couldn't grow, and there was no open slot for the new item. Remove the artifical 2^17 table limit and instead allow the table to grow to be as big as 2^30. With a 64 byte block size, this permits hashing inputs as large as 64 GB. If the table reaches 2^30 (or cannot be allocated) hashing is aborted. RenameDetector no longer tries to break a modify file pair, and it does not try to match the file for rename or copy detection. Change-Id: Ibb4d756844f4667e181e24a34a468dc3655863ac Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/diff/SimilarityIndexTest.java | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'org.eclipse.jgit.test') diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java index 7e42e53586..1da5828b34 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/SimilarityIndexTest.java @@ -48,10 +48,11 @@ import java.io.IOException; import junit.framework.TestCase; +import org.eclipse.jgit.diff.SimilarityIndex.TableFullException; import org.eclipse.jgit.lib.Constants; public class SimilarityIndexTest extends TestCase { - public void testIndexingSmallObject() { + public void testIndexingSmallObject() throws TableFullException { SimilarityIndex si = hash("" // + "A\n" // + "B\n" // @@ -70,7 +71,8 @@ public class SimilarityIndexTest extends TestCase { assertEquals(2, si.count(si.findIndex(key_D))); } - public void testIndexingLargeObject() throws IOException { + public void testIndexingLargeObject() throws IOException, + TableFullException { byte[] in = ("" // + "A\n" // + "B\n" // @@ -81,7 +83,7 @@ public class SimilarityIndexTest extends TestCase { assertEquals(2, si.size()); } - public void testCommonScore_SameFiles() { + public void testCommonScore_SameFiles() throws TableFullException { String text = "" // + "A\n" // + "B\n" // @@ -96,21 +98,22 @@ public class SimilarityIndexTest extends TestCase { assertEquals(100, dst.score(src, 100)); } - public void testCommonScore_EmptyFiles() { + public void testCommonScore_EmptyFiles() throws TableFullException { SimilarityIndex src = hash(""); SimilarityIndex dst = hash(""); assertEquals(0, src.common(dst)); assertEquals(0, dst.common(src)); } - public void testCommonScore_TotallyDifferentFiles() { + public void testCommonScore_TotallyDifferentFiles() + throws TableFullException { SimilarityIndex src = hash("A\n"); SimilarityIndex dst = hash("D\n"); assertEquals(0, src.common(dst)); assertEquals(0, dst.common(src)); } - public void testCommonScore_SimiliarBy75() { + public void testCommonScore_SimiliarBy75() throws TableFullException { SimilarityIndex src = hash("A\nB\nC\nD\n"); SimilarityIndex dst = hash("A\nB\nC\nQ\n"); assertEquals(6, src.common(dst)); @@ -120,10 +123,11 @@ public class SimilarityIndexTest extends TestCase { assertEquals(75, dst.score(src, 100)); } - private static SimilarityIndex hash(String text) { + private static SimilarityIndex hash(String text) throws TableFullException { SimilarityIndex src = new SimilarityIndex() { @Override - void hash(byte[] raw, int ptr, final int end) { + void hash(byte[] raw, int ptr, final int end) + throws TableFullException { while (ptr < end) { int hash = raw[ptr] & 0xff; int start = ptr; @@ -143,7 +147,7 @@ public class SimilarityIndexTest extends TestCase { return src; } - private static int keyFor(String line) { + private static int keyFor(String line) throws TableFullException { SimilarityIndex si = hash(line); assertEquals("single line scored", 1, si.size()); return si.key(0); -- cgit v1.2.3