diff options
author | Jonathan Tan <jonathantanmy@google.com> | 2023-05-02 10:44:16 -0700 |
---|---|---|
committer | Jonathan Tan <jonathantanmy@google.com> | 2023-07-18 14:21:48 -0700 |
commit | d3b40e72acd30b8842da8f450775a5c847ad20ef (patch) | |
tree | c66714363dc0cbf6329574cc088a83353784d07f | |
parent | ff0f7c174f1af853a158321468492324f05cf052 (diff) | |
download | jgit-d3b40e72acd30b8842da8f450775a5c847ad20ef.tar.gz jgit-d3b40e72acd30b8842da8f450775a5c847ad20ef.zip |
RevWalk: use changed path filters
Teach RevWalk, TreeRevFilter, PathFilter, and FollowFilter to use
changed path filters, whenever available, to speed revision walks by
skipping commits that fail the changed path filter.
This work is based on earlier work by Kyle Zhao
(I441be984b609669cff77617ecfc838b080ce0816).
Change-Id: I7396f70241e571c63aabe337f6de1b8b9800f7ed
Signed-off-by: kylezhao <kylezhao@tencent.com>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
8 files changed, 211 insertions, 13 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java index 3cc0368943..9809d34051 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java @@ -26,6 +26,7 @@ import java.util.Comparator; import java.util.List; import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.internal.storage.file.GC; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ConfigConstants; @@ -167,6 +168,72 @@ public class RevWalkCommitGraphTest extends RevWalkTestCase { } @Test + public void testChangedPathFilter() throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + commitFile("file2", "2", "master"); + RevCommit c3 = commitFile("file1", "3", "master"); + RevCommit c4 = commitFile("file2", "4", "master"); + + enableAndWriteCommitGraph(); + + TreeRevFilter trf = new TreeRevFilter(rw, PathFilter.create("file1")); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // 1 commit that has exactly one parent and matches path + assertEquals(1, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // 2 commits that have exactly one parent and don't match path + assertEquals(2, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilterWithFollowFilter() throws Exception { + RevCommit c0 = commit(tree()); + RevCommit c1 = commit(tree(file("file", blob("contents"))), c0); + RevCommit c2 = commit(tree(file("file", blob("contents")), + file("unrelated", blob("unrelated change"))), c1); + RevCommit c3 = commit(tree(file("renamed-file", blob("contents")), + file("unrelated", blob("unrelated change"))), c2); + RevCommit c4 = commit( + tree(file("renamed-file", blob("contents")), + file("unrelated", blob("another unrelated change"))), + c3); + branch(c4, "master"); + + enableAndWriteCommitGraph(); + + db.getConfig().setString(ConfigConstants.CONFIG_DIFF_SECTION, null, + ConfigConstants.CONFIG_KEY_RENAMES, "true"); + + TreeRevFilter trf = new TreeRevFilter(rw, + new FollowFilter(PathFilter.create("renamed-file"), + db.getConfig().get(DiffConfig.KEY))); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // Path "renamed-file" is in c3's bloom filter, and another path "file" + // is in c1's bloom filter (we know of "file" because the rev walk + // detected that "renamed-file" is a renaming of "file") + assertEquals(2, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // 2 commits that have exactly one parent and don't match path + assertEquals(2, trf.getChangedPathFilterNegative()); + } + + @Test public void testWalkWithCommitMessageFilter() throws Exception { RevCommit a = commit(); RevCommit b = commitBuilder().parent(a) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java index 14a34375cd..35ef51f4fd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java @@ -11,6 +11,8 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; +import java.util.Optional; +import java.util.Set; import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.errors.IncorrectObjectTypeException; @@ -90,6 +92,12 @@ public class FollowFilter extends TreeFilter { } @Override + public Optional<Set<byte[]>> getPathsBestEffort() { + return path.getPathsBestEffort(); + } + + /** {@inheritDoc} */ + @Override public TreeFilter clone() { return new FollowFilter(path.clone(), cfg); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java index e32f28ebbc..316aeda4f4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java @@ -25,6 +25,7 @@ import java.util.List; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; @@ -687,6 +688,20 @@ public class RevCommit extends RevObject { } /** + * Get the changed path filter of the commit. + * <p> + * This is null when there is no commit graph file, the commit is not in the + * commit graph file, or the commit graph file was generated without changed + * path filters. + * + * @return the changed path filter + * @since 6.7 + */ + ChangedPathFilter getChangedPathFilter() { + return null; + } + + /** * Reset this commit to allow another RevWalk with the same instances. * <p> * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java index 3846ca579a..bfc0c59aad 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommitCG.java @@ -14,6 +14,7 @@ import java.io.IOException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -29,6 +30,8 @@ class RevCommitCG extends RevCommit { private final int graphPosition; + private final ChangedPathFilter changedPathFilter; + private int generation = Constants.COMMIT_GENERATION_UNKNOWN; /** @@ -38,10 +41,14 @@ class RevCommitCG extends RevCommit { * object name for the commit. * @param graphPosition * the position in the commit-graph of the object. + * @param changedPathFilter + * the changed path filter if one exists */ - protected RevCommitCG(AnyObjectId id, int graphPosition) { + protected RevCommitCG(AnyObjectId id, int graphPosition, + ChangedPathFilter changedPathFilter) { super(id); this.graphPosition = graphPosition; + this.changedPathFilter = changedPathFilter; } @Override @@ -100,4 +107,10 @@ class RevCommitCG extends RevCommit { int getGeneration() { return generation; } + + /** {@inheritDoc} */ + @Override + ChangedPathFilter getChangedPathFilter() { + return changedPathFilter; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index 27a09f4956..f4bf710ed2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -1713,7 +1713,8 @@ public class RevWalk implements Iterable<RevCommit>, AutoCloseable { private RevCommit createCommit(AnyObjectId id, int graphPos) { if (graphPos >= 0) { - return new RevCommitCG(id, graphPos); + return new RevCommitCG(id, graphPos, + commitGraph().getChangedPathFilter(graphPos)); } return new RevCommit(id); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java index 98bb5884fa..017a825ac6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java @@ -12,7 +12,10 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; import java.util.List; +import java.util.Optional; +import java.util.Set; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffEntry.ChangeType; @@ -47,6 +50,12 @@ public class TreeRevFilter extends RevFilter { private final TreeWalk pathFilter; + private long changedPathFilterTruePositive = 0; + + private long changedPathFilterFalsePositive = 0; + + private long changedPathFilterNegative = 0; + /** * Create a {@link org.eclipse.jgit.revwalk.filter.RevFilter} from a * {@link org.eclipse.jgit.treewalk.filter.TreeFilter}. @@ -122,12 +131,41 @@ public class TreeRevFilter extends RevFilter { // We have exactly one parent. This is a very common case. // int chgs = 0, adds = 0; - while (tw.next()) { - chgs++; - if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) { - adds++; - } else { - break; // no point in looking at this further. + boolean changedPathFilterUsed = false; + boolean mustCalculateChgs = true; + ChangedPathFilter cpf = c.getChangedPathFilter(); + if (cpf != null) { + Optional<Set<byte[]>> paths = pathFilter.getFilter() + .getPathsBestEffort(); + if (paths.isPresent()) { + changedPathFilterUsed = true; + for (byte[] path : paths.get()) { + if (!cpf.maybeContains(path)) { + mustCalculateChgs = false; + break; + } + } + } + } + if (mustCalculateChgs) { + while (tw.next()) { + chgs++; + if (tw.getRawMode(0) == 0 && tw.getRawMode(1) != 0) { + adds++; + } else { + break; // no point in looking at this further. + } + } + if (changedPathFilterUsed) { + if (chgs > 0) { + changedPathFilterTruePositive++; + } else { + changedPathFilterFalsePositive++; + } + } + } else { + if (changedPathFilterUsed) { + changedPathFilterNegative++; } } @@ -244,6 +282,37 @@ public class TreeRevFilter extends RevFilter { return false; } + /** + * Return how many times a changed path filter correctly predicted that a + * path was changed in a commit, for statistics gathering purposes. + * + * @return count of true positives + */ + public long getChangedPathFilterTruePositive() { + return changedPathFilterTruePositive; + } + + /** + * Return how many times a changed path filter wrongly predicted that a path + * was changed in a commit, for statistics gathering purposes. + * + * @return count of false positives + */ + public long getChangedPathFilterFalsePositive() { + return changedPathFilterFalsePositive; + } + + /** + * Return how many times a changed path filter predicted that a path was not + * changed in a commit (allowing that commit to be skipped), for statistics + * gathering purposes. + * + * @return count of negatives + */ + public long getChangedPathFilterNegative() { + return changedPathFilterNegative; + } + private void updateFollowFilter(ObjectId[] trees, DiffConfig cfg) throws MissingObjectException, IncorrectObjectTypeException, CorruptObjectException, IOException { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java index 63e587ab24..66ab8bb5be 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilter.java @@ -11,6 +11,9 @@ package org.eclipse.jgit.treewalk.filter; +import java.util.Optional; +import java.util.Set; +import java.util.HashSet; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.treewalk.TreeWalk; @@ -45,7 +48,8 @@ public class PathFilter extends TreeFilter { while (path.endsWith("/")) //$NON-NLS-1$ path = path.substring(0, path.length() - 1); if (path.length() == 0) - throw new IllegalArgumentException(JGitText.get().emptyPathNotPermitted); + throw new IllegalArgumentException( + JGitText.get().emptyPathNotPermitted); return new PathFilter(path); } @@ -86,6 +90,14 @@ public class PathFilter extends TreeFilter { } @Override + public Optional<Set<byte[]>> getPathsBestEffort() { + HashSet<byte[]> s = new HashSet<>(); + s.add(pathRaw); + return Optional.of(s); + } + + /** {@inheritDoc} */ + @Override public PathFilter clone() { return this; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java index 28f2cd9461..22d430bc27 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java @@ -11,6 +11,8 @@ package org.eclipse.jgit.treewalk.filter; import java.io.IOException; +import java.util.Optional; +import java.util.Set; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.IncorrectObjectTypeException; @@ -188,10 +190,8 @@ public abstract class TreeFilter { * as thrown by {@link #include(TreeWalk)} * @since 4.7 */ - public int matchFilter(TreeWalk walker) - throws MissingObjectException, IncorrectObjectTypeException, - IOException - { + public int matchFilter(TreeWalk walker) throws MissingObjectException, + IncorrectObjectTypeException, IOException { return include(walker) ? 0 : 1; } @@ -210,6 +210,19 @@ public abstract class TreeFilter { public abstract boolean shouldBeRecursive(); /** + * If this filter checks that a specific set of paths have all been + * modified, returns that set of paths to be checked against a changed path + * filter. Otherwise, returns empty. + * + * @return a set of paths, or empty + * + * @since 6.7 + */ + public Optional<Set<byte[]>> getPathsBestEffort() { + return Optional.empty(); + } + + /** * {@inheritDoc} * * Clone this tree filter, including its parameters. |