]> source.dussan.org Git - jgit.git/commitdiff
Implemented file path based tie breaking to exact rename detection 30/1130/2
authorJeff Schumacher <jeffschu@google.com>
Tue, 13 Jul 2010 19:21:12 +0000 (12:21 -0700)
committerJeff Schumacher <jeffschu@google.com>
Fri, 16 Jul 2010 16:56:42 +0000 (09:56 -0700)
During the exact rename detection phase in RenameDetector, ties were
resolved on a first-found basis. I added support for file path based
tie breaking during that phase. Basically, there are four situations
that have to be handled:

One add matching one delete:
In this simple case, we pair them as a rename.

One add matching many deletes:
Find the delete whos path matches the add the closest, and
pair them as a rename.

Many adds matching one delete:
Similar to the above case, we find the add that matches the
delete the closest, and pair them as a rename. The other adds
are marked as copies of the delete.

Many adds matching many deletes:
Build a scoring matrix similar to the one used for content-
based matching, scoring instead by file path. Some of the
utility functions in SimilarityRenameDetector are used in
this case, as we use the same encoding scheme. Once the
matrix is built, scan it for the best matches, marking them
as renames. The rest are marked as copies.

I don't particularly like the idea of using utility functions right
out of SimilarityRenameDetector, but it works for the moment. A later
commit will likely refactor this into a common utility class, as well
as bringing exact rename detection out of RenameDetector and into a
separate class, much like SimilarityRenameDetector.

Change-Id: I1fb08390aebdcbf20d049aecf402a36506e55611

org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java

index 86b40e2f4d9a22c33f4f54ef74fc010d1a2c081a..c32c78668fa1531fb179103d7941782fb64f5a85 100644 (file)
@@ -146,6 +146,48 @@ public class RenameDetectorTest extends RepositoryTestCase {
                assertRename(a, d, 100, entries.get(2));
        }
 
+       public void testExactRename_PathBreaksTie() throws Exception {
+               ObjectId foo = blob("foo");
+
+               DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
+               DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo);
+
+               DiffEntry c = DiffEntry.add("c.txt", foo);
+               DiffEntry d = DiffEntry.delete("d.txt", foo);
+
+               // Add out of order to avoid first-match succeeding
+               rd.add(a);
+               rd.add(d);
+               rd.add(b);
+               rd.add(c);
+
+               List<DiffEntry> entries = rd.compute();
+               assertEquals(2, entries.size());
+               assertRename(d, c, 100, entries.get(0));
+               assertRename(b, a, 100, entries.get(1));
+       }
+
+       public void testExactRename_OneDeleteManyAdds() throws Exception {
+               ObjectId foo = blob("foo");
+
+               DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
+               DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo);
+               DiffEntry c = DiffEntry.add("c.txt", foo);
+
+               DiffEntry d = DiffEntry.delete("d.txt", foo);
+
+               rd.add(a);
+               rd.add(b);
+               rd.add(c);
+               rd.add(d);
+
+               List<DiffEntry> entries = rd.compute();
+               assertEquals(3, entries.size());
+               assertRename(d, c, 100, entries.get(0));
+               assertCopy(d, a, 100, entries.get(1));
+               assertCopy(d, b, 100, entries.get(2));
+       }
+
        public void testInexactRename_OnePair() throws Exception {
                ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
                ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
@@ -385,4 +427,20 @@ public class RenameDetectorTest extends RepositoryTestCase {
 
                assertEquals(score, rename.getScore());
        }
+
+       private static void assertCopy(DiffEntry o, DiffEntry n, int score,
+                       DiffEntry copy) {
+               assertEquals(ChangeType.COPY, copy.getChangeType());
+
+               assertEquals(o.getOldName(), copy.getOldName());
+               assertEquals(n.getNewName(), copy.getNewName());
+
+               assertEquals(o.getOldMode(), copy.getOldMode());
+               assertEquals(n.getNewMode(), copy.getNewMode());
+
+               assertEquals(o.getOldId(), copy.getOldId());
+               assertEquals(n.getNewId(), copy.getNewId());
+
+               assertEquals(score, copy.getScore());
+       }
 }
index 4d25dbd02e244ab9dd07294078c19a55e26e225e..304e4cff39a4d95230753b75804800597a4afb2a 100644 (file)
@@ -315,4 +315,31 @@ public class DiffEntry {
        public AbbreviatedObjectId getNewId() {
                return newId;
        }
+
+       @Override
+       public String toString() {
+               StringBuilder buf = new StringBuilder();
+               buf.append("DiffEntry[");
+               buf.append(changeType);
+               buf.append(" ");
+               switch (changeType) {
+               case ADD:
+                       buf.append(newName);
+                       break;
+               case COPY:
+                       buf.append(oldName + "->" + newName);
+                       break;
+               case DELETE:
+                       buf.append(oldName);
+                       break;
+               case MODIFY:
+                       buf.append(oldName);
+                       break;
+               case RENAME:
+                       buf.append(oldName + "->" + newName);
+                       break;
+               }
+               buf.append("]");
+               return buf.toString();
+       }
 }
\ No newline at end of file
index 5bb63c4dd1066d7f20858674f8ead661a46ed6a2..bc23940535fed508b0ec3a36506d7632815ab375 100644 (file)
@@ -45,6 +45,7 @@ package org.eclipse.jgit.diff;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
@@ -312,70 +313,131 @@ public class RenameDetector {
                        return;
 
                pm.beginTask(JGitText.get().renamesFindingExact, //
-                               added.size() + deleted.size());
+                               added.size() + added.size() + deleted.size()
+                                               + added.size() * deleted.size());
 
-               HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
-               for (DiffEntry del : deleted) {
-                       Object old = map.put(del.oldId, del);
-                       if (old instanceof DiffEntry) {
-                               ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
-                               list.add((DiffEntry) old);
-                               list.add(del);
-                               map.put(del.oldId, list);
+               HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
+               HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
 
-                       } else if (old != null) {
-                               // Must be a list of DiffEntries
-                               ((List) old).add(del);
-                               map.put(del.oldId, old);
-                       }
-                       pm.update(1);
+               ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size());
+               ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>();
+
+               for (Object o : addedMap.values()) {
+                       if (o instanceof DiffEntry)
+                               uniqueAdds.add((DiffEntry) o);
+                       else
+                               nonUniqueAdds.add((List<DiffEntry>) o);
                }
 
                ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size());
-               for (DiffEntry dst : added) {
-                       Object del = map.get(dst.newId);
+
+               for (DiffEntry a : uniqueAdds) {
+                       Object del = deletedMap.get(a.newId);
                        if (del instanceof DiffEntry) {
+                               // We have one add to one delete: pair them if they are the same
+                               // type
                                DiffEntry e = (DiffEntry) del;
-                               if (sameType(e.oldMode, dst.newMode)) {
-                                       if (e.changeType == ChangeType.DELETE) {
-                                               e.changeType = ChangeType.RENAME;
-                                               entries.add(exactRename(e, dst));
-                                       } else {
-                                               entries.add(exactCopy(e, dst));
-                                       }
+                               if (sameType(e.oldMode, a.newMode)) {
+                                       e.changeType = ChangeType.RENAME;
+                                       entries.add(exactRename(e, a));
                                } else {
-                                       left.add(dst);
+                                       left.add(a);
                                }
-
                        } else if (del != null) {
+                               // We have one add to many deletes: find the delete with the
+                               // same type and closest name to the add, then pair them
                                List<DiffEntry> list = (List<DiffEntry>) del;
-                               DiffEntry best = null;
-                               for (DiffEntry e : list) {
-                                       if (sameType(e.oldMode, dst.newMode)) {
-                                               best = e;
-                                               break;
-                                       }
+                               DiffEntry best = bestPathMatch(a, list);
+                               if (best != null) {
+                                       best.changeType = ChangeType.RENAME;
+                                       entries.add(exactRename(best, a));
+                               } else {
+                                       left.add(a);
                                }
+                       } else {
+                               left.add(a);
+                       }
+                       pm.update(1);
+               }
+
+               for (List<DiffEntry> adds : nonUniqueAdds) {
+                       Object o = deletedMap.get(adds.get(0).newId);
+                       if (o instanceof DiffEntry) {
+                               // We have many adds to one delete: find the add with the same
+                               // type and closest name to the delete, then pair them. Mark the
+                               // rest as copies of the delete.
+                               DiffEntry d = (DiffEntry) o;
+                               DiffEntry best = bestPathMatch(d, adds);
                                if (best != null) {
-                                       if (best.changeType == ChangeType.DELETE) {
-                                               best.changeType = ChangeType.RENAME;
-                                               entries.add(exactRename(best, dst));
-                                       } else {
-                                               entries.add(exactCopy(best, dst));
+                                       d.changeType = ChangeType.RENAME;
+                                       entries.add(exactRename(d, best));
+                                       for (DiffEntry a : adds) {
+                                               if (a != best) {
+                                                       if (sameType(d.oldMode, a.newMode)) {
+                                                               entries.add(exactCopy(d, a));
+                                                       } else {
+                                                               left.add(a);
+                                                       }
+                                               }
                                        }
                                } else {
-                                       left.add(dst);
+                                       left.addAll(adds);
                                }
-
                        } else {
-                               left.add(dst);
+                               // We have many adds to many deletes: score all the adds against
+                               // all the deletes by path name, take the best matches, pair
+                               // them as renames, then call the rest copies
+                               List<DiffEntry> dels = (List<DiffEntry>) o;
+                               long[] matrix = new long[dels.size() * adds.size()];
+                               int mNext = 0;
+                               for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
+                                       String addedName = adds.get(addIdx).newName;
+
+                                       for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
+                                               String deletedName = dels.get(delIdx).oldName;
+
+                                               int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
+                                               matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx);
+                                               mNext++;
+                                       }
+                               }
+
+                               Arrays.sort(matrix);
+
+                               for (--mNext; mNext >= 0; mNext--) {
+                                       long ent = matrix[mNext];
+                                       int delIdx = SimilarityRenameDetector.srcFile(ent);
+                                       int addIdx = SimilarityRenameDetector.dstFile(ent);
+                                       DiffEntry d = dels.get(delIdx);
+                                       DiffEntry a = adds.get(addIdx);
+
+                                       if (a == null) {
+                                               pm.update(1);
+                                               continue; // was already matched earlier
+                                       }
+
+                                       ChangeType type;
+                                       if (d.changeType == ChangeType.DELETE) {
+                                               // First use of this source file. Tag it as a rename so we
+                                               // later know it is already been used as a rename, other
+                                               // matches (if any) will claim themselves as copies instead.
+                                               //
+                                               d.changeType = ChangeType.RENAME;
+                                               type = ChangeType.RENAME;
+                                       } else {
+                                               type = ChangeType.COPY;
+                                       }
+
+                                       entries.add(DiffEntry.pair(type, d, a, 100));
+                                       adds.set(addIdx, null); // Claim the destination was matched.
+                                       pm.update(1);
+                               }
                        }
-                       pm.update(1);
                }
                added = left;
 
-               deleted = new ArrayList<DiffEntry>(map.size());
-               for (Object o : map.values()) {
+               deleted = new ArrayList<DiffEntry>(deletedMap.size());
+               for (Object o : deletedMap.values()) {
                        if (o instanceof DiffEntry) {
                                DiffEntry e = (DiffEntry) o;
                                if (e.changeType == ChangeType.DELETE)
@@ -391,6 +453,69 @@ public class RenameDetector {
                pm.endTask();
        }
 
+       /**
+        * Find the best match by file path for a given DiffEntry from a list of
+        * DiffEntrys. The returned DiffEntry will be of the same type as <src>. If
+        * no DiffEntry can be found that has the same type, this method will return
+        * null.
+        *
+        * @param src
+        *            the DiffEntry to try to find a match for
+        * @param list
+        *            a list of DiffEntrys to search through
+        * @return the DiffEntry from <list> who's file path best matches <src>
+        */
+       private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
+               DiffEntry best = null;
+               int score = -1;
+
+               for (DiffEntry d : list) {
+                       if (sameType(mode(d), mode(src))) {
+                               int tmp = SimilarityRenameDetector
+                                               .nameScore(path(d), path(src));
+                               if (tmp > score) {
+                                       best = d;
+                                       score = tmp;
+                               }
+                       }
+               }
+
+               return best;
+       }
+
+       @SuppressWarnings("unchecked")
+       private HashMap<AbbreviatedObjectId, Object> populateMap(
+                       List<DiffEntry> diffEntries, ProgressMonitor pm) {
+               HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
+               for (DiffEntry de : diffEntries) {
+                       Object old = map.put(id(de), de);
+                       if (old instanceof DiffEntry) {
+                               ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
+                               list.add((DiffEntry) old);
+                               list.add(de);
+                               map.put(id(de), list);
+                       } else if (old != null) {
+                               // Must be a list of DiffEntries
+                               ((List<DiffEntry>) old).add(de);
+                               map.put(id(de), old);
+                       }
+                       pm.update(1);
+               }
+               return map;
+       }
+
+       private static String path(DiffEntry de) {
+               return de.changeType == ChangeType.DELETE ? de.oldName : de.newName;
+       }
+
+       private static FileMode mode(DiffEntry de) {
+               return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
+       }
+
+       private static AbbreviatedObjectId id(DiffEntry de) {
+               return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
+       }
+
        static boolean sameType(FileMode a, FileMode b) {
                // Files have to be of the same type in order to rename them.
                // We would never want to rename a file to a gitlink, or a
index 6590f746f3789709f0164bc50adebb17cada1227..e2115f0acc6bbbd8106fc333c63a3567789fbd2d 100644 (file)
@@ -287,7 +287,7 @@ class SimilarityRenameDetector {
                return mNext;
        }
 
-       private int nameScore(String a, String b) {
+       static int nameScore(String a, String b) {
            int aDirLen = a.lastIndexOf("/") + 1;
            int bDirLen = b.lastIndexOf("/") + 1;
 
@@ -349,15 +349,15 @@ class SimilarityRenameDetector {
                return (int) (value >>> SCORE_SHIFT);
        }
 
-       private static int srcFile(long value) {
+       static int srcFile(long value) {
                return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
        }
 
-       private static int dstFile(long value) {
+       static int dstFile(long value) {
                return decodeFile(((int) value) & INDEX_MASK);
        }
 
-       private static long encode(int score, int srcIdx, int dstIdx) {
+       static long encode(int score, int srcIdx, int dstIdx) {
                return (((long) score) << SCORE_SHIFT) //
                                | (encodeFile(srcIdx) << BITS_PER_INDEX) //
                                | encodeFile(dstIdx);