From 1a364c49ec88e5b2e642dddc743df5ebd7445daf Mon Sep 17 00:00:00 2001 From: Simeon Andreev Date: Wed, 15 Jun 2022 22:23:43 +0200 Subject: [PATCH] JGit blame very slow for large merge commits that rename files Adjusted BlameGenerator to filter rename detection with the blame path. This reduces the running time of the blame computation significantly, for repositories with massive commits involving renames. The filtered rename detection is made (internally) available with: org.eclipse.jgit.internal.diff.FilteredRenameDetector Bug: 578900 Change-Id: I6580004e81102d685081b8180da1587a35073d36 Signed-off-by: Simeon Andreev --- org.eclipse.jgit.test/META-INF/MANIFEST.MF | 1 + .../diff/AbstractRenameDetectionTestCase.java | 101 ++++++++++++ .../jgit/diff/FilteredRenameDetectorTest.java | 154 ++++++++++++++++++ .../eclipse/jgit/diff/RenameDetectorTest.java | 71 +------- org.eclipse.jgit/META-INF/MANIFEST.MF | 2 + .../eclipse/jgit/blame/BlameGenerator.java | 8 +- .../internal/diff/FilteredRenameDetector.java | 136 ++++++++++++++++ 7 files changed, 400 insertions(+), 73 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java diff --git a/org.eclipse.jgit.test/META-INF/MANIFEST.MF b/org.eclipse.jgit.test/META-INF/MANIFEST.MF index 76f69a6459..0ba71f982d 100644 --- a/org.eclipse.jgit.test/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit.test/META-INF/MANIFEST.MF @@ -33,6 +33,7 @@ Import-Package: com.googlecode.javaewah;version="[1.1.6,2.0.0)", org.eclipse.jgit.ignore;version="[6.3.0,6.4.0)", org.eclipse.jgit.ignore.internal;version="[6.3.0,6.4.0)", org.eclipse.jgit.internal;version="[6.3.0,6.4.0)", + org.eclipse.jgit.internal.diff;version="[6.3.0,6.4.0)", org.eclipse.jgit.internal.diffmergetool;version="[6.3.0,6.4.0)", org.eclipse.jgit.internal.fsck;version="[6.3.0,6.4.0)", org.eclipse.jgit.internal.revwalk;version="[6.3.0,6.4.0)", diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java new file mode 100644 index 0000000000..a8967f27ec --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/AbstractRenameDetectionTestCase.java @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2022, Google Inc. and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.diff; + +import static org.junit.Assert.assertEquals; + +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.AbbreviatedObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Repository; +import org.junit.Before; + +public abstract class AbstractRenameDetectionTestCase + extends RepositoryTestCase { + + protected static final String PATH_A = "src/A"; + + protected static final String PATH_B = "src/B"; + + protected static final String PATH_H = "src/H"; + + protected static final String PATH_Q = "src/Q"; + + protected TestRepository testDb; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + testDb = new TestRepository<>(db); + } + + protected ObjectId blob(String content) throws Exception { + return testDb.blob(content).copy(); + } + + protected static void assertRename(DiffEntry o, DiffEntry n, int score, + DiffEntry rename) { + assertEquals(ChangeType.RENAME, rename.getChangeType()); + + assertEquals(o.getOldPath(), rename.getOldPath()); + assertEquals(n.getNewPath(), rename.getNewPath()); + + assertEquals(o.getOldMode(), rename.getOldMode()); + assertEquals(n.getNewMode(), rename.getNewMode()); + + assertEquals(o.getOldId(), rename.getOldId()); + assertEquals(n.getNewId(), rename.getNewId()); + + assertEquals(score, rename.getScore()); + } + + protected static void assertCopy(DiffEntry o, DiffEntry n, int score, + DiffEntry copy) { + assertEquals(ChangeType.COPY, copy.getChangeType()); + + assertEquals(o.getOldPath(), copy.getOldPath()); + assertEquals(n.getNewPath(), copy.getNewPath()); + + assertEquals(o.getOldMode(), copy.getOldMode()); + assertEquals(n.getNewMode(), copy.getNewMode()); + + assertEquals(o.getOldId(), copy.getOldId()); + assertEquals(n.getNewId(), copy.getNewId()); + + assertEquals(score, copy.getScore()); + } + + protected static void assertAdd(String newName, ObjectId newId, + FileMode newMode, DiffEntry add) { + assertEquals(DiffEntry.DEV_NULL, add.oldPath); + assertEquals(DiffEntry.A_ZERO, add.oldId); + assertEquals(FileMode.MISSING, add.oldMode); + assertEquals(ChangeType.ADD, add.changeType); + assertEquals(newName, add.newPath); + assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId); + assertEquals(newMode, add.newMode); + } + + protected static void assertDelete(String oldName, ObjectId oldId, + FileMode oldMode, DiffEntry delete) { + assertEquals(DiffEntry.DEV_NULL, delete.newPath); + assertEquals(DiffEntry.A_ZERO, delete.newId); + assertEquals(FileMode.MISSING, delete.newMode); + assertEquals(ChangeType.DELETE, delete.changeType); + assertEquals(oldName, delete.oldPath); + assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId); + assertEquals(oldMode, delete.oldMode); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java new file mode 100644 index 0000000000..bfda36db76 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/FilteredRenameDetectorTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2022, Simeon Andreev and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.diff; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; + +import java.util.Arrays; +import java.util.List; +import org.eclipse.jgit.internal.diff.FilteredRenameDetector; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.treewalk.filter.PathFilter; +import org.junit.Before; +import org.junit.Test; + +public class FilteredRenameDetectorTest extends AbstractRenameDetectionTestCase { + + private FilteredRenameDetector frd; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + frd = new FilteredRenameDetector(db); + } + + @Test + public void testExactRename() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); + + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); + + DiffEntry c = DiffEntry.add(PATH_H, bar); + DiffEntry d = DiffEntry.delete(PATH_B, bar); + + List changes = Arrays.asList(a, b, c, d); + PathFilter filter = PathFilter.create(PATH_A); + List entries = frd.compute(changes, filter); + assertEquals("Unexpected entries in: " + entries, 1, entries.size()); + assertRename(b, a, 100, entries.get(0)); + } + + @Test + public void testExactRename_multipleFilters() throws Exception { + ObjectId foo = blob("foo"); + ObjectId bar = blob("bar"); + + DiffEntry a = DiffEntry.add(PATH_A, foo); + DiffEntry b = DiffEntry.delete(PATH_Q, foo); + + DiffEntry c = DiffEntry.add(PATH_H, bar); + DiffEntry d = DiffEntry.delete(PATH_B, bar); + + List changes = Arrays.asList(a, b, c, d); + List filters = Arrays.asList(PathFilter.create(PATH_A), + PathFilter.create(PATH_H)); + List entries = frd.compute(changes, filters); + assertEquals("Unexpected entries in: " + entries, 2, entries.size()); + assertRename(b, a, 100, entries.get(0)); + assertRename(d, c, 100, entries.get(1)); + } + + @Test + public void testInexactRename() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + ObjectId cId = blob("some\nsort\nof\ntext\n"); + ObjectId dId = blob("completely\nunrelated\ntext\n"); + DiffEntry c = DiffEntry.add(PATH_B, cId); + DiffEntry d = DiffEntry.delete(PATH_H, dId); + + List changes = Arrays.asList(a, b, c, d); + PathFilter filter = PathFilter.create(PATH_A); + List entries = frd.compute(changes, filter); + assertEquals("Unexpected entries: " + entries, 1, entries.size()); + assertRename(b, a, 66, entries.get(0)); + } + + @Test + public void testInexactRename_multipleFilters() throws Exception { + ObjectId aId = blob("foo\nbar\nbaz\nblarg\n"); + ObjectId bId = blob("foo\nbar\nbaz\nblah\n"); + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + ObjectId cId = blob("some\nsort\nof\ntext\n"); + ObjectId dId = blob("completely\nunrelated\ntext\n"); + DiffEntry c = DiffEntry.add(PATH_B, cId); + DiffEntry d = DiffEntry.delete(PATH_H, dId); + + List changes = Arrays.asList(a, b, c, d); + List filters = Arrays.asList(PathFilter.create(PATH_A), + PathFilter.create(PATH_H)); + List entries = frd.compute(changes, filters); + assertEquals("Unexpected entries: " + entries, 2, entries.size()); + assertRename(b, a, 66, entries.get(0)); + assertSame(d, entries.get(1)); + } + + @Test + public void testNoRenames() throws Exception { + ObjectId aId = blob(""); + ObjectId bId = blob("blah1"); + ObjectId cId = blob(""); + ObjectId dId = blob("blah2"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + DiffEntry c = DiffEntry.add(PATH_H, cId); + DiffEntry d = DiffEntry.delete(PATH_B, dId); + + List changes = Arrays.asList(a, b, c, d); + PathFilter filter = PathFilter.create(PATH_A); + List entries = frd.compute(changes, filter); + assertEquals("Unexpected entries in: " + entries, 1, entries.size()); + assertSame(a, entries.get(0)); + } + + @Test + public void testNoRenames_multipleFilters() throws Exception { + ObjectId aId = blob(""); + ObjectId bId = blob("blah1"); + ObjectId cId = blob(""); + ObjectId dId = blob("blah2"); + + DiffEntry a = DiffEntry.add(PATH_A, aId); + DiffEntry b = DiffEntry.delete(PATH_Q, bId); + + DiffEntry c = DiffEntry.add(PATH_H, cId); + DiffEntry d = DiffEntry.delete(PATH_B, dId); + + List changes = Arrays.asList(a, b, c, d); + List filters = Arrays.asList(PathFilter.create(PATH_A), + PathFilter.create(PATH_H)); + List entries = frd.compute(changes, filters); + assertEquals("Unexpected entries in: " + entries, 2, entries.size()); + assertSame(a, entries.get(0)); + assertSame(c, entries.get(1)); + } +} diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java index 5edb60ce37..ad560e3b8a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java @@ -18,31 +18,20 @@ import static org.junit.Assert.fail; import java.util.Arrays; import java.util.List; -import org.eclipse.jgit.diff.DiffEntry.ChangeType; -import org.eclipse.jgit.junit.RepositoryTestCase; -import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.Repository; import org.junit.Before; import org.junit.Test; -public class RenameDetectorTest extends RepositoryTestCase { - private static final String PATH_A = "src/A"; - private static final String PATH_B = "src/B"; - private static final String PATH_H = "src/H"; - private static final String PATH_Q = "src/Q"; +public class RenameDetectorTest extends AbstractRenameDetectionTestCase { private RenameDetector rd; - private TestRepository testDb; - @Override @Before public void setUp() throws Exception { super.setUp(); - testDb = new TestRepository<>(db); rd = new RenameDetector(db); } @@ -675,62 +664,4 @@ public class RenameDetectorTest extends RepositoryTestCase { assertSame(c, entries.get(2)); assertSame(d, entries.get(3)); } - - private ObjectId blob(String content) throws Exception { - return testDb.blob(content).copy(); - } - - private static void assertRename(DiffEntry o, DiffEntry n, int score, - DiffEntry rename) { - assertEquals(ChangeType.RENAME, rename.getChangeType()); - - assertEquals(o.getOldPath(), rename.getOldPath()); - assertEquals(n.getNewPath(), rename.getNewPath()); - - assertEquals(o.getOldMode(), rename.getOldMode()); - assertEquals(n.getNewMode(), rename.getNewMode()); - - assertEquals(o.getOldId(), rename.getOldId()); - assertEquals(n.getNewId(), rename.getNewId()); - - assertEquals(score, rename.getScore()); - } - - private static void assertCopy(DiffEntry o, DiffEntry n, int score, - DiffEntry copy) { - assertEquals(ChangeType.COPY, copy.getChangeType()); - - assertEquals(o.getOldPath(), copy.getOldPath()); - assertEquals(n.getNewPath(), copy.getNewPath()); - - assertEquals(o.getOldMode(), copy.getOldMode()); - assertEquals(n.getNewMode(), copy.getNewMode()); - - assertEquals(o.getOldId(), copy.getOldId()); - assertEquals(n.getNewId(), copy.getNewId()); - - assertEquals(score, copy.getScore()); - } - - private static void assertAdd(String newName, ObjectId newId, - FileMode newMode, DiffEntry add) { - assertEquals(DiffEntry.DEV_NULL, add.oldPath); - assertEquals(DiffEntry.A_ZERO, add.oldId); - assertEquals(FileMode.MISSING, add.oldMode); - assertEquals(ChangeType.ADD, add.changeType); - assertEquals(newName, add.newPath); - assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId); - assertEquals(newMode, add.newMode); - } - - private static void assertDelete(String oldName, ObjectId oldId, - FileMode oldMode, DiffEntry delete) { - assertEquals(DiffEntry.DEV_NULL, delete.newPath); - assertEquals(DiffEntry.A_ZERO, delete.newId); - assertEquals(FileMode.MISSING, delete.newMode); - assertEquals(ChangeType.DELETE, delete.changeType); - assertEquals(oldName, delete.oldPath); - assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId); - assertEquals(oldMode, delete.oldMode); - } } diff --git a/org.eclipse.jgit/META-INF/MANIFEST.MF b/org.eclipse.jgit/META-INF/MANIFEST.MF index 6d43eba6c5..b919140989 100644 --- a/org.eclipse.jgit/META-INF/MANIFEST.MF +++ b/org.eclipse.jgit/META-INF/MANIFEST.MF @@ -70,6 +70,8 @@ Export-Package: org.eclipse.jgit.annotations;version="6.3.0", org.eclipse.jgit.internal;version="6.3.0"; x-friends:="org.eclipse.jgit.test, org.eclipse.jgit.http.test", + org.eclipse.jgit.internal.diff;version="6.3.0"; + x-friends:="org.eclipse.jgit.test", org.eclipse.jgit.internal.diffmergetool;version="6.3.0"; x-friends:="org.eclipse.jgit.test, org.eclipse.jgit.pgm.test, diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java b/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java index 10d77528f6..77967df2e5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/blame/BlameGenerator.java @@ -41,6 +41,7 @@ import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.diff.FilteredRenameDetector; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; @@ -1109,9 +1110,10 @@ public class BlameGenerator implements AutoCloseable { treeWalk.setFilter(TreeFilter.ANY_DIFF); treeWalk.reset(parent.getTree(), commit.getTree()); - renameDetector.reset(); - renameDetector.addAll(DiffEntry.scan(treeWalk)); - for (DiffEntry ent : renameDetector.compute()) { + List diffs = DiffEntry.scan(treeWalk); + FilteredRenameDetector filteredRenameDetector = new FilteredRenameDetector( + renameDetector); + for (DiffEntry ent : filteredRenameDetector.compute(diffs, path)) { if (isRename(ent) && ent.getNewPath().equals(path.getPath())) return ent; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java new file mode 100644 index 0000000000..d65624fc6a --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/diff/FilteredRenameDetector.java @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2022, Simeon Andreev and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.internal.diff; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.DiffEntry.ChangeType; +import org.eclipse.jgit.diff.RenameDetector; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.treewalk.filter.PathFilter; + +/** + * Provides rename detection in special cases such as blame, where only a subset + * of the renames detected by {@link RenameDetector} is of interest. + */ +public class FilteredRenameDetector { + + private final RenameDetector renameDetector; + + /** + * @param repository + * The repository in which to check for renames. + */ + public FilteredRenameDetector(Repository repository) { + this(new RenameDetector(repository)); + } + + /** + * @param renameDetector + * The {@link RenameDetector} to use when checking for renames. + */ + public FilteredRenameDetector(RenameDetector renameDetector) { + this.renameDetector = renameDetector; + } + + /** + * @param diffs + * The set of changes to check. + * @param pathFilter + * Filter out changes that didn't affect this path. + * @return The subset of changes that affect only the filtered path. + * @throws IOException + */ + public List compute(List diffs, + PathFilter pathFilter) throws IOException { + return compute(diffs, Arrays.asList(pathFilter)); + } + + /** + * Tries to avoid computation overhead in {@link RenameDetector#compute()} + * by filtering diffs related to the path filters only. + *

+ * Note: current implementation only optimizes added or removed diffs, + * further optimization is possible. + * + * @param changes + * The set of changes to check. + * @param pathFilters + * Filter out changes that didn't affect these paths. + * @return The subset of changes that affect only the filtered paths. + * @throws IOException + * @see RenameDetector#compute() + */ + public List compute(List changes, + List pathFilters) throws IOException { + + if (pathFilters == null) { + throw new IllegalArgumentException("Must specify path filters"); //$NON-NLS-1$ + } + + Set paths = new HashSet<>(pathFilters.size()); + for (PathFilter pathFilter : pathFilters) { + paths.add(pathFilter.getPath()); + } + + List filtered = new ArrayList<>(); + + // For new path: skip ADD's that don't match given paths + for (DiffEntry diff : changes) { + ChangeType changeType = diff.getChangeType(); + if (changeType != ChangeType.ADD + || paths.contains(diff.getNewPath())) { + filtered.add(diff); + } + } + + renameDetector.reset(); + renameDetector.addAll(filtered); + List sourceChanges = renameDetector.compute(); + + filtered.clear(); + + // For old path: skip DELETE's that don't match given paths + for (DiffEntry diff : changes) { + ChangeType changeType = diff.getChangeType(); + if (changeType != ChangeType.DELETE + || paths.contains(diff.getOldPath())) { + filtered.add(diff); + } + } + + renameDetector.reset(); + renameDetector.addAll(filtered); + List targetChanges = renameDetector.compute(); + + List result = new ArrayList<>(); + + for (DiffEntry sourceChange : sourceChanges) { + if (paths.contains(sourceChange.getNewPath())) { + result.add(sourceChange); + } + } + for (DiffEntry targetChange : targetChanges) { + if (paths.contains(targetChange.getOldPath())) { + result.add(targetChange); + } + } + + renameDetector.reset(); + return result; + } +} -- 2.39.5