]> source.dussan.org Git - jgit.git/commitdiff
Fixed bug in scoring mechanism for rename detection 32/1232/2
authorJeff Schumacher <jeffschu@google.com>
Tue, 3 Aug 2010 23:59:30 +0000 (16:59 -0700)
committerShawn O. Pearce <spearce@spearce.org>
Wed, 4 Aug 2010 17:56:19 +0000 (10:56 -0700)
A bug in rename detection would cause file scores to be wrong. The
bug was due to the way rename detection would judge the similarity
between files. If file A has three lines containing 'foo', and file
B has 5 lines containing 'foo', the rename detection phase should
record that A and B have three lines in common (the minimum of the
number of times that line appears in both files). Instead, it would
choose the the number of times the line appeared in the destination
file, in this case file B. I fixed the bug by having the
SimilarityIndex instead choose the minimum number, as it should. I
also added a test case to verify that the bug had been fixed.

Change-Id: Ic75272a2d6e512a361f88eec91e1b8a7c2298d6b

org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java

index bfc51b68f9382871744f3556456c6afdfba227cc..6024c7699896da978e0dd3371e8c8e2f0bc4ff23 100644 (file)
@@ -275,6 +275,21 @@ public class RenameDetectorTest extends RepositoryTestCase {
                assertRename(b, a, 74, entries.get(0));
        }
 
+       public void testInexactRename_SameContentMultipleTimes() throws Exception {
+               ObjectId aId = blob("a\na\na\na\n");
+               ObjectId bId = blob("a\na\na\n");
+
+               DiffEntry a = DiffEntry.add(PATH_A, aId);
+               DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+               rd.add(a);
+               rd.add(b);
+
+               List<DiffEntry> entries = rd.compute();
+               assertEquals(1, entries.size());
+               assertRename(b, a, 74, entries.get(0));
+       }
+
        public void testInexactRenames_OnePair2() throws Exception {
                ObjectId aId = blob("ab\nab\nab\nac\nad\nae\n");
                ObjectId bId = blob("ac\nab\nab\nab\naa\na0\na1\n");
index d5a31d6044a30589cb8660bea7f6c1c6f6a60572..b460d498cb29000dbcfbc72c4b48cca0322101de 100644 (file)
@@ -172,7 +172,8 @@ class SimilarityIndex {
 
                for (;;) {
                        if (srcKey == dstKey) {
-                               common += countOf(dstHash[dstIdx]);
+                               common += Math.min(countOf(srcHash[srcIdx]),
+                                               countOf(dstHash[dstIdx]));
 
                                if (++srcIdx == srcHash.length)
                                        break;