import junit.framework.TestCase;
+import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.lib.Constants;
public class SimilarityIndexTest extends TestCase {
- public void testIndexingSmallObject() {
+ public void testIndexingSmallObject() throws TableFullException {
SimilarityIndex si = hash("" //
+ "A\n" //
+ "B\n" //
assertEquals(2, si.count(si.findIndex(key_D)));
}
- public void testIndexingLargeObject() throws IOException {
+ public void testIndexingLargeObject() throws IOException,
+ TableFullException {
byte[] in = ("" //
+ "A\n" //
+ "B\n" //
assertEquals(2, si.size());
}
- public void testCommonScore_SameFiles() {
+ public void testCommonScore_SameFiles() throws TableFullException {
String text = "" //
+ "A\n" //
+ "B\n" //
assertEquals(100, dst.score(src, 100));
}
- public void testCommonScore_EmptyFiles() {
+ public void testCommonScore_EmptyFiles() throws TableFullException {
SimilarityIndex src = hash("");
SimilarityIndex dst = hash("");
assertEquals(0, src.common(dst));
assertEquals(0, dst.common(src));
}
- public void testCommonScore_TotallyDifferentFiles() {
+ public void testCommonScore_TotallyDifferentFiles()
+ throws TableFullException {
SimilarityIndex src = hash("A\n");
SimilarityIndex dst = hash("D\n");
assertEquals(0, src.common(dst));
assertEquals(0, dst.common(src));
}
- public void testCommonScore_SimiliarBy75() {
+ public void testCommonScore_SimiliarBy75() throws TableFullException {
SimilarityIndex src = hash("A\nB\nC\nD\n");
SimilarityIndex dst = hash("A\nB\nC\nQ\n");
assertEquals(6, src.common(dst));
assertEquals(75, dst.score(src, 100));
}
- private static SimilarityIndex hash(String text) {
+ private static SimilarityIndex hash(String text) throws TableFullException {
SimilarityIndex src = new SimilarityIndex() {
@Override
- void hash(byte[] raw, int ptr, final int end) {
+ void hash(byte[] raw, int ptr, final int end)
+ throws TableFullException {
while (ptr < end) {
int hash = raw[ptr] & 0xff;
int start = ptr;
return src;
}
- private static int keyFor(String line) {
+ private static int keyFor(String line) throws TableFullException {
SimilarityIndex si = hash(line);
assertEquals("single line scored", 1, si.size());
return si.key(0);
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
throws IOException {
- SimilarityIndex src = new SimilarityIndex();
- src.hash(reader.open(OLD, d));
- src.sort();
-
- SimilarityIndex dst = new SimilarityIndex();
- dst.hash(reader.open(NEW, d));
- dst.sort();
- return src.score(dst, 100);
+ try {
+ SimilarityIndex src = new SimilarityIndex();
+ src.hash(reader.open(OLD, d));
+ src.sort();
+
+ SimilarityIndex dst = new SimilarityIndex();
+ dst.hash(reader.open(NEW, d));
+ dst.sort();
+ return src.score(dst, 100);
+ } catch (TableFullException tableFull) {
+ // If either table overflowed while being constructed, don't allow
+ // the pair to be broken. Returning 1 higher than breakScore will
+ // ensure its not similar, but not quite dissimilar enough to break.
+ //
+ overRenameLimit = true;
+ return breakScore + 1;
+ }
}
private void findContentRenames(ContentSource.Pair reader,
d = new SimilarityRenameDetector(reader, deleted, added);
d.setRenameScore(getRenameScore());
d.compute(pm);
+ overRenameLimit |= d.isTableOverflow();
deleted = d.getLeftOverSources();
added = d.getLeftOverDestinations();
entries.addAll(d.getMatches());
* file are discovered.
*/
class SimilarityIndex {
- /** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */
- private static final int MAX_HASH_BITS = 17;
+ /** A special {@link TableFullException} used in place of OutOfMemoryError. */
+ private static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
/**
* Shift to apply before storing a key.
/** Number of non-zero entries in {@link #idHash}. */
private int idSize;
+ /** {@link #idSize} that triggers {@link #idHash} to double in size. */
+ private int idGrowAt;
+
/**
* Pairings of content keys and counters.
* <p>
* Slots in the table are actually two ints wedged into a single long. The
- * upper {@link #MAX_HASH_BITS} bits stores the content key, and the
- * remaining lower bits stores the number of bytes associated with that key.
- * Empty slots are denoted by 0, which cannot occur because the count cannot
- * be 0. Values can only be positive, which we enforce during key addition.
+ * upper 32 bits stores the content key, and the remaining lower bits stores
+ * the number of bytes associated with that key. Empty slots are denoted by
+ * 0, which cannot occur because the count cannot be 0. Values can only be
+ * positive, which we enforce during key addition.
*/
private long[] idHash;
SimilarityIndex() {
idHashBits = 8;
idHash = new long[1 << idHashBits];
+ idGrowAt = growAt(idHashBits);
}
long getFileSize() {
fileSize = size;
}
- void hash(ObjectLoader obj) throws MissingObjectException, IOException {
+ void hash(ObjectLoader obj) throws MissingObjectException, IOException,
+ TableFullException {
if (obj.isLarge()) {
ObjectStream in = obj.openStream();
try {
}
}
- void hash(byte[] raw, int ptr, final int end) {
+ void hash(byte[] raw, int ptr, final int end) throws TableFullException {
while (ptr < end) {
int hash = 5381;
int start = ptr;
}
}
- void hash(InputStream in, long remaining) throws IOException {
+ void hash(InputStream in, long remaining) throws IOException,
+ TableFullException {
byte[] buf = new byte[4096];
int ptr = 0;
int cnt = 0;
return (idHash.length - idSize) + idx;
}
- void add(int key, int cnt) {
+ void add(int key, int cnt) throws TableFullException {
key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative.
int j = slot(key);
long v = idHash[j];
if (v == 0) {
// Empty slot in the table, store here.
- if (shouldGrow()) {
+ if (idGrowAt <= idSize) {
grow();
j = slot(key);
continue;
return key >>> (31 - idHashBits);
}
- private boolean shouldGrow() {
- return idHashBits < MAX_HASH_BITS && idHash.length <= idSize * 2;
+ private static int growAt(int idHashBits) {
+ return (1 << idHashBits) * (idHashBits - 3) / idHashBits;
}
- private void grow() {
+ private void grow() throws TableFullException {
+ if (idHashBits == 30)
+ throw new TableFullException();
+
long[] oldHash = idHash;
int oldSize = idHash.length;
idHashBits++;
- idHash = new long[1 << idHashBits];
+ idGrowAt = growAt(idHashBits);
+
+ try {
+ idHash = new long[1 << idHashBits];
+ } catch (OutOfMemoryError noMemory) {
+ throw TABLE_FULL_OUT_OF_MEMORY;
+ }
+
for (int i = 0; i < oldSize; i++) {
long v = oldHash[i];
if (v != 0) {
private static int countOf(long v) {
return (int) v;
}
+
+ static class TableFullException extends Exception {
+ private static final long serialVersionUID = 1L;
+ }
}
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ProgressMonitor;
/** Score a pair must exceed to be considered a rename. */
private int renameScore = 60;
+ /** Set if any {@link SimilarityIndex.TableFullException} occurs. */
+ private boolean tableOverflow;
+
private List<DiffEntry> out;
SimilarityRenameDetector(ContentSource.Pair reader, List<DiffEntry> srcs,
return dsts;
}
+ boolean isTableOverflow() {
+ return tableOverflow;
+ }
+
private static List<DiffEntry> compactSrcList(List<DiffEntry> in) {
ArrayList<DiffEntry> r = new ArrayList<DiffEntry>(in.size());
for (DiffEntry e : in) {
continue;
}
- SimilarityIndex s = hash(OLD, srcEnt);
+ SimilarityIndex s;
+ try {
+ s = hash(OLD, srcEnt);
+ } catch (TableFullException tableFull) {
+ tableOverflow = true;
+ continue;
+ }
+
for (int dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
DiffEntry dstEnt = dsts.get(dstIdx);
continue;
}
- SimilarityIndex d = hash(NEW, dstEnt);
+ SimilarityIndex d;
+ try {
+ d = hash(NEW, dstEnt);
+ } catch (TableFullException tableFull) {
+ tableOverflow = true;
+ pm.update(1);
+ continue;
+ }
+
int contentScore = s.score(d, 10000);
// nameScore returns a value between 0 and 100, but we want it
}
private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
- throws IOException {
+ throws IOException, TableFullException {
SimilarityIndex r = new SimilarityIndex();
r.hash(reader.open(side, ent));
r.sort();