diff options
author | Xing Huang <xingkhuang@google.com> | 2025-03-11 17:22:08 -0500 |
---|---|---|
committer | Xing Huang <xingkhuang@google.com> | 2025-03-13 14:18:12 -0500 |
commit | e2ddcc96903ee411e6933495ae07251a7c55e4c9 (patch) | |
tree | bde7e87a4e60d2509730500bca79eea360da7b6c | |
parent | 52dc9fdd0d65fa06d6c98072a16b181a024c533b (diff) | |
download | jgit-e2ddcc96903ee411e6933495ae07251a7c55e4c9.tar.gz jgit-e2ddcc96903ee411e6933495ae07251a7c55e4c9.zip |
TreeRevFilter: enable Bloom Filter usage with ChangedPathTreeFilter
The paths relevant for a treewalk can be defined with hierarchy of tree
filters. TreeRevFilter retrieves these paths from #getPathsBestEffort to
apply them to the ChangePathFilter (bloom filters), however the plain
list of paths cannot represent the And/Or/Not of the tree filter API
(e.g. NOT(/a/b) or AND("/a", "/b")).
Introduce a new TreeFilter method #shouldTreeWalk() to let the filters
decide whether a set of tree entries need to be tree walked or can be
discarded right away.
Create a new ChangePathTreeFilter that can use changed path filters to
determine shouldTreeWalk.
Update TreeRevFilter to use a ChangePathTreeFilter, instead of getting
paths and check the changed tree filters itself.
Signed-off-by: Xing Huang <xingkhuang@google.com>
Change-Id: I8edd0b8423f2bfb85b38d7f997f3cd8dad558bc8
8 files changed, 725 insertions, 60 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java index c2f8f10631..e47dd898b0 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/RevWalkCommitGraphTest.java @@ -37,8 +37,9 @@ import org.eclipse.jgit.revwalk.filter.MessageRevFilter; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.storage.file.FileBasedConfig; import org.eclipse.jgit.treewalk.filter.AndTreeFilter; +import org.eclipse.jgit.treewalk.filter.ChangedPathTreeFilter; +import org.eclipse.jgit.treewalk.filter.OrTreeFilter; import org.eclipse.jgit.treewalk.filter.PathFilter; -import org.eclipse.jgit.treewalk.filter.PathFilterGroup; import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.junit.Test; @@ -172,61 +173,99 @@ public class RevWalkCommitGraphTest extends RevWalkTestCase { } @Test - public void testChangedPathFilter() throws Exception { - RevCommit c1 = commitFile("file1", "1", "master"); - commitFile("file2", "2", "master"); - RevCommit c3 = commitFile("file1", "3", "master"); - RevCommit c4 = commitFile("file2", "4", "master"); + public void testChangedPathFilter_allModify() throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file2", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file1", blob("3"))), c2); + RevCommit c4 = commit(tree(file("file2", blob("4"))), c3); + + branch(c4, "master"); enableAndWriteCommitGraph(); - TreeRevFilter trf = new TreeRevFilter(rw, PathFilter.create("file1")); + TreeRevFilter trf = new TreeRevFilter(rw, + ChangedPathTreeFilter.create("file1")); rw.markStart(rw.lookupCommit(c4)); rw.setRevFilter(trf); + assertEquals(c4, rw.next()); assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); assertEquals(c1, rw.next()); assertNull(rw.next()); - // 1 commit that has exactly one parent and matches path - assertEquals(1, trf.getChangedPathFilterTruePositive()); + // all commits modified file1 but c1 did not have a parent + assertEquals(3, trf.getChangedPathFilterTruePositive()); // No false positives assertEquals(0, trf.getChangedPathFilterFalsePositive()); - // 2 commits that have exactly one parent and don't match path - assertEquals(2, trf.getChangedPathFilterNegative()); + // No negatives because all 4 commits had modified file1 + assertEquals(0, trf.getChangedPathFilterNegative()); } @Test - public void testChangedPathFilterWithMultiPaths() throws Exception { - RevCommit c1 = commitFile("file1", "1", "master"); - RevCommit c2 = commitFile("file1", "2", "master"); - RevCommit c3 = commitFile("file2", "3", "master"); - RevCommit c4 = commitFile("file3", "4", "master"); + public void testChangedPathFilter_someModify() throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file1", blob("1"))), c1); + RevCommit c3 = commit(tree(file("file1", blob("2"))), c2); + RevCommit c4 = commit(tree(file("file1", blob("1"))), c3); + + branch(c4, "master"); enableAndWriteCommitGraph(); TreeRevFilter trf = new TreeRevFilter(rw, - PathFilterGroup.createFromStrings(List.of("file1", "file2"))); + ChangedPathTreeFilter.create("file1")); rw.markStart(rw.lookupCommit(c4)); rw.setRevFilter(trf); + assertEquals(c4, rw.next()); assertEquals(c3, rw.next()); - assertEquals(c2, rw.next()); assertEquals(c1, rw.next()); assertNull(rw.next()); - // c2 and c3 has either file1 or file2, c1 did not use ChangedPathFilter - // since it has no parent + // c4 and c3 modified file1. c1 did not have a parent assertEquals(2, trf.getChangedPathFilterTruePositive()); // No false positives assertEquals(0, trf.getChangedPathFilterFalsePositive()); - // c4 does not match either file1 or file2 + // c2 did not modify file1 assertEquals(1, trf.getChangedPathFilterNegative()); } @Test + public void testChangedPathFilterWithMultiPaths() throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file1", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file2", blob("3"))), c2); + RevCommit c4 = commit(tree(file("file3", blob("4"))), c3); + + branch(c4, "master"); + + enableAndWriteCommitGraph(); + + TreeRevFilter trf = new TreeRevFilter(rw, + ChangedPathTreeFilter.create("file1", "file2")); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c4, rw.next()); + assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // all commits have modified either file1 or file2, c1 did not have a + // parent + assertEquals(3, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // No negative + assertEquals(0, trf.getChangedPathFilterNegative()); + } + + @Test public void testChangedPathFilterWithFollowFilter() throws Exception { RevCommit c0 = commit(tree()); RevCommit c1 = commit(tree(file("file", blob("contents"))), c0); @@ -245,9 +284,8 @@ public class RevWalkCommitGraphTest extends RevWalkTestCase { db.getConfig().setString(ConfigConstants.CONFIG_DIFF_SECTION, null, ConfigConstants.CONFIG_KEY_RENAMES, "true"); - TreeRevFilter trf = new TreeRevFilter(rw, - new FollowFilter(PathFilter.create("renamed-file"), - db.getConfig().get(DiffConfig.KEY))); + TreeRevFilter trf = new TreeRevFilter(rw, FollowFilter + .create("renamed-file", db.getConfig().get(DiffConfig.KEY))); rw.markStart(rw.lookupCommit(c4)); rw.setRevFilter(trf); assertEquals(c3, rw.next()); @@ -267,6 +305,296 @@ public class RevWalkCommitGraphTest extends RevWalkTestCase { } @Test + public void testChangedPathFilter_pathFilter_or_pathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit( + tree(file("file1", blob("1")), file("file2", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file2", blob("2"))), c2); + RevCommit c4 = commit( + tree(file("file2", blob("2")), file("file3", blob("3"))), c3); + RevCommit c5 = commit( + tree(file("file2", blob("2")), file("file3", blob("3"))), c4); + + branch(c5, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf1 = ChangedPathTreeFilter.create("file1"); + ChangedPathTreeFilter pf2 = ChangedPathTreeFilter.create("file2"); + + TreeFilter tf = OrTreeFilter + .create(new ChangedPathTreeFilter[] { pf1, pf2 }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c5)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // c2 and c3 has either file1 or file2, c1 is not counted as + // ChangedPathFilter only applies to commits with 1 parent + assertEquals(2, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // c4 and c5 did not modify file1 or file2 + assertEquals(2, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_pathFilter_or_pathFilter_or_pathFilter_listOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit( + tree(file("file1", blob("1")), file("file2", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file2", blob("2"))), c2); + RevCommit c4 = commit(tree(file("file3", blob("3"))), c3); + RevCommit c5 = commit(tree(file("file3", blob("3"))), c4); + + branch(c5, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf1 = ChangedPathTreeFilter.create("file1"); + ChangedPathTreeFilter pf2 = ChangedPathTreeFilter.create("file2"); + ChangedPathTreeFilter pf3 = ChangedPathTreeFilter.create("file3"); + + TreeFilter tf = OrTreeFilter + .create(new ChangedPathTreeFilter[] { pf1, pf2, pf3 }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c5)); + rw.setRevFilter(trf); + assertEquals(c4, rw.next()); + assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // c2 and c3 has either modified file1 or file2 or file3, c1 is not + // counted as ChangedPathFilter only applies to commits with 1 parent + assertEquals(3, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // c5 does not modify either file1 or file2 or file3 + assertEquals(1, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_pathFilter_or_nonPathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file2", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file2", blob("3"))), c2); + RevCommit c4 = commit(tree(file("file2", blob("3"))), c3); + + branch(c4, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf = ChangedPathTreeFilter.create("file1"); + TreeFilter npf = TreeFilter.ANY_DIFF; + + TreeFilter tf = OrTreeFilter.create(new TreeFilter[] { pf, npf }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // c2 modified file1, c3 defaulted positive due to ANY_DIFF, c1 is not + // counted as ChangedPathFilter only applies to commits with 1 parent + assertEquals(2, trf.getChangedPathFilterTruePositive()); + + // c4 defaulted positive due to ANY_DIFF, but didn't no diff with its + // parent c3 + assertEquals(1, trf.getChangedPathFilterFalsePositive()); + + // No negative due to the OrTreeFilter + assertEquals(0, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_nonPathFilter_or_nonPathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + RevCommit c2 = commitFile("file2", "2", "master"); + RevCommit c3 = commitFile("file3", "3", "master"); + RevCommit c4 = commitFile("file4", "4", "master"); + + enableAndWriteCommitGraph(); + + TreeFilter npf1 = TreeFilter.ANY_DIFF; + TreeFilter npf2 = TreeFilter.ANY_DIFF; + + TreeFilter tf = OrTreeFilter.create(new TreeFilter[] { npf1, npf2 }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c4, rw.next()); + assertEquals(c3, rw.next()); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // No true positives since there's no pathFilter + assertEquals(0, trf.getChangedPathFilterTruePositive()); + + // No false positives since there's no pathFilter + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // No negative since there's no pathFilter + assertEquals(0, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_pathFilter_and_pathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file2", blob("2"))), c1); + + branch(c2, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf1 = ChangedPathTreeFilter.create("file1"); + ChangedPathTreeFilter pf2 = ChangedPathTreeFilter.create("file2"); + + TreeFilter atf = AndTreeFilter + .create(new ChangedPathTreeFilter[] { pf1, pf2 }); + TreeRevFilter trf = new TreeRevFilter(rw, atf); + + rw.markStart(rw.lookupCommit(c2)); + rw.setRevFilter(trf); + + assertNull(rw.next()); + + // c1 is not counted as ChangedPathFilter only applies to commits with 1 + // parent + assertEquals(0, trf.getChangedPathFilterTruePositive()); + + // c2 has modified both file 1 and file2, + // however nothing is returned from TreeWalk since a TreeHead + // cannot be two paths at once + assertEquals(1, trf.getChangedPathFilterFalsePositive()); + + // No negatives + assertEquals(0, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_pathFilter_and_pathFilter_and_pathFilter_listOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file2", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file3", blob("3"))), c2); + + branch(c3, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf1 = ChangedPathTreeFilter.create("file1"); + ChangedPathTreeFilter pf2 = ChangedPathTreeFilter.create("file2"); + ChangedPathTreeFilter pf3 = ChangedPathTreeFilter.create("file3"); + + TreeFilter tf = AndTreeFilter + .create(new ChangedPathTreeFilter[] { pf1, pf2, pf3 }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c3)); + rw.setRevFilter(trf); + assertNull(rw.next()); + + // c1 is not counted as ChangedPathFilter only applies to commits with 1 + // parent + assertEquals(0, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // c2 and c3 can not possibly have both file1, file2, and file3 as + // treeHead at once + assertEquals(2, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_pathFilter_and_nonPathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commit(tree(file("file1", blob("1")))); + RevCommit c2 = commit(tree(file("file1", blob("2"))), c1); + RevCommit c3 = commit(tree(file("file1", blob("2"))), c2); + + branch(c3, "master"); + + enableAndWriteCommitGraph(); + + ChangedPathTreeFilter pf = ChangedPathTreeFilter.create("file1"); + TreeFilter npf = TreeFilter.ANY_DIFF; + + TreeFilter tf = AndTreeFilter.create(new TreeFilter[] { pf, npf }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c3)); + rw.setRevFilter(trf); + assertEquals(c2, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // c2 modified file1 and c1 is not counted as ChangedPathFilter only + // applies to commits with 1 parent + assertEquals(1, trf.getChangedPathFilterTruePositive()); + + // No false positives + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // c3 did not modify file1 + assertEquals(1, trf.getChangedPathFilterNegative()); + } + + @Test + public void testChangedPathFilter_nonPathFilter_and_nonPathFilter_binaryOperation() + throws Exception { + RevCommit c1 = commitFile("file1", "1", "master"); + commitFile("file1", "1", "master"); + RevCommit c3 = commitFile("file3", "3", "master"); + RevCommit c4 = commitFile("file4", "4", "master"); + + enableAndWriteCommitGraph(); + + TreeFilter npf1 = TreeFilter.ANY_DIFF; + TreeFilter npf2 = TreeFilter.ANY_DIFF; + + TreeFilter tf = AndTreeFilter.create(new TreeFilter[] { npf1, npf2 }); + + TreeRevFilter trf = new TreeRevFilter(rw, tf); + rw.markStart(rw.lookupCommit(c4)); + rw.setRevFilter(trf); + assertEquals(c4, rw.next()); + assertEquals(c3, rw.next()); + assertEquals(c1, rw.next()); + assertNull(rw.next()); + + // No true positives since there's no path + assertEquals(0, trf.getChangedPathFilterTruePositive()); + + // No false positives since there's no path + assertEquals(0, trf.getChangedPathFilterFalsePositive()); + + // No negative since there's no path + assertEquals(0, trf.getChangedPathFilterNegative()); + } + + @Test public void testWalkWithCommitMessageFilter() throws Exception { RevCommit a = commit(); RevCommit b = commitBuilder().parent(a) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilterTest.java new file mode 100644 index 0000000000..88f6b75c6b --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilterTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2025, Google LLC + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.treewalk.filter; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.Collectors; + +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.filter.TreeFilter.MutableBoolean; +import org.junit.Test; + +public class ChangedPathTreeFilterTest { + + @Test + public void shouldTreeWalk_no_usingCpf() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + MutableBoolean cfpUsed = new MutableBoolean(); + + boolean result = f.shouldTreeWalk(FakeRevCommit.withCpfFor("c"), null, + cfpUsed); + + assertFalse(result); + assertTrue(cfpUsed.get()); + } + + @Test + public void shouldTreeWalk_yes_usingCpf() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + MutableBoolean cfpUsed = new MutableBoolean(); + + boolean result = f.shouldTreeWalk(FakeRevCommit.withCpfFor("a/b"), null, + cfpUsed); + + assertTrue(result); + assertTrue(cfpUsed.get()); + } + + @Test + public void shouldTreeWalk_yes_noCpf() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + MutableBoolean cfpUsed = new MutableBoolean(); + + boolean result = f.shouldTreeWalk(FakeRevCommit.noCpf(), null, + cfpUsed); + + assertTrue(result); + assertFalse(cfpUsed.get()); + } + + @Test + public void shouldTreeWalk_no_usingCpf_noReport() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + boolean result = f.shouldTreeWalk(FakeRevCommit.withCpfFor("c"), null, + null); + + assertFalse(result); + } + + @Test + public void shouldTreeWalk_yes_usingCpf_noReport() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + boolean result = f.shouldTreeWalk(FakeRevCommit.withCpfFor("a/b"), null, + null); + assertTrue(result); + } + + @Test + public void shouldTreeWalk_yes_noCpf_noReport() { + ChangedPathTreeFilter f = ChangedPathTreeFilter.create("a/b"); + boolean result = f.shouldTreeWalk(FakeRevCommit.noCpf(), null, + null); + + assertTrue(result); + } + + private static class FakeRevCommit extends RevCommit { + + static RevCommit withCpfFor(String... paths) { + return new FakeRevCommit( + ChangedPathFilter.fromPaths(Arrays.stream(paths) + .map(str -> ByteBuffer.wrap(str.getBytes(UTF_8))) + .collect(Collectors.toSet()))); + } + + static RevCommit noCpf() { + return new FakeRevCommit(null); + } + + private final ChangedPathFilter cpf; + + /** + * Create a new commit reference. + * + * @param cpf + * changedPathFilter + */ + protected FakeRevCommit(ChangedPathFilter cpf) { + super(ObjectId.zeroId()); + this.cpf = cpf; + } + + @Override + public ChangedPathFilter getChangedPathFilter(RevWalk rw) { + return cpf; + } + } +}
\ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java index 35ef51f4fd..12e6c4ea98 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/FollowFilter.java @@ -18,7 +18,7 @@ import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.treewalk.TreeWalk; -import org.eclipse.jgit.treewalk.filter.PathFilter; +import org.eclipse.jgit.treewalk.filter.ChangedPathTreeFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; /** @@ -56,39 +56,44 @@ public class FollowFilter extends TreeFilter { * @since 3.0 */ public static FollowFilter create(String path, DiffConfig cfg) { - return new FollowFilter(PathFilter.create(path), cfg); + return new FollowFilter(ChangedPathTreeFilter.create(path), cfg); } - private final PathFilter path; + private final ChangedPathTreeFilter path; final DiffConfig cfg; private RenameCallback renameCallback; - FollowFilter(PathFilter path, DiffConfig cfg) { + FollowFilter(ChangedPathTreeFilter path, DiffConfig cfg) { this.path = path; this.cfg = cfg; } - /** @return the path this filter matches. */ /** * Get the path this filter matches. * * @return the path this filter matches. */ public String getPath() { - return path.getPath(); + return path.getPaths().get(0); } @Override public boolean include(TreeWalk walker) throws MissingObjectException, IncorrectObjectTypeException, IOException { - return path.include(walker) && ANY_DIFF.include(walker); + return path.include(walker); + } + + @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + return path.shouldTreeWalk(c, rw, cpfUsed); } @Override public boolean shouldBeRecursive() { - return path.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); + return path.shouldBeRecursive(); } @Override @@ -105,9 +110,7 @@ public class FollowFilter extends TreeFilter { @SuppressWarnings("nls") @Override public String toString() { - return "(FOLLOW(" + path.toString() + ")" // - + " AND " // - + ANY_DIFF.toString() + ")"; + return "(FOLLOW(" + path.toString() + "))"; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java index 99943b78e6..e9a3e72c7f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/TreeRevFilter.java @@ -12,15 +12,11 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; import java.util.List; -import java.util.Optional; -import java.util.Set; -import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; import org.eclipse.jgit.diff.DiffConfig; import org.eclipse.jgit.diff.DiffEntry; import org.eclipse.jgit.diff.DiffEntry.ChangeType; import org.eclipse.jgit.diff.RenameDetector; -import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StopWalkException; @@ -28,6 +24,7 @@ import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.TreeFilter; +import org.eclipse.jgit.treewalk.filter.TreeFilter.MutableBoolean; /** * Filter applying a {@link org.eclipse.jgit.treewalk.filter.TreeFilter} against @@ -50,6 +47,8 @@ public class TreeRevFilter extends RevFilter { private final TreeWalk pathFilter; + private final MutableBoolean changedPathFilterUsed = new MutableBoolean(); + private long changedPathFilterTruePositive = 0; private long changedPathFilterFalsePositive = 0; @@ -126,24 +125,15 @@ public class TreeRevFilter extends RevFilter { } trees[nParents] = c.getTree(); tw.reset(trees); + changedPathFilterUsed.reset(); if (nParents == 1) { // We have exactly one parent. This is a very common case. // int chgs = 0, adds = 0; - boolean changedPathFilterUsed = false; - boolean mustCalculateChgs = true; - ChangedPathFilter cpf = c.getChangedPathFilter(walker); - if (cpf != null) { - Optional<Set<byte[]>> paths = pathFilter.getFilter() - .getPathsBestEffort(); - if (paths.isPresent()) { - changedPathFilterUsed = true; - if (paths.get().stream().noneMatch(cpf::maybeContains)) { - mustCalculateChgs = false; - } - } - } + TreeFilter tf = pathFilter.getFilter(); + boolean mustCalculateChgs = tf.shouldTreeWalk(c, walker, + changedPathFilterUsed); if (mustCalculateChgs) { while (tw.next()) { chgs++; @@ -153,7 +143,7 @@ public class TreeRevFilter extends RevFilter { break; // no point in looking at this further. } } - if (changedPathFilterUsed) { + if (changedPathFilterUsed.get()) { if (chgs > 0) { changedPathFilterTruePositive++; } else { @@ -161,7 +151,7 @@ public class TreeRevFilter extends RevFilter { } } } else { - if (changedPathFilterUsed) { + if (changedPathFilterUsed.get()) { changedPathFilterNegative++; } } @@ -315,9 +305,7 @@ public class TreeRevFilter extends RevFilter { } private void updateFollowFilter(ObjectId[] trees, DiffConfig cfg, - RevCommit commit) - throws MissingObjectException, IncorrectObjectTypeException, - CorruptObjectException, IOException { + RevCommit commit) throws IOException { TreeWalk tw = pathFilter; FollowFilter oldFilter = (FollowFilter) tw.getFilter(); tw.setFilter(TreeFilter.ANY_DIFF); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/AndTreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/AndTreeFilter.java index c6804da039..b35dbebd17 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/AndTreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/AndTreeFilter.java @@ -12,11 +12,14 @@ package org.eclipse.jgit.treewalk.filter; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.treewalk.TreeWalk; /** @@ -100,6 +103,13 @@ public abstract class AndTreeFilter extends TreeFilter { } @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + return a.shouldTreeWalk(c, rw, cpfUsed) + && b.shouldTreeWalk(c, rw, cpfUsed); + } + + @Override public int matchFilter(TreeWalk walker) throws MissingObjectException, IncorrectObjectTypeException, IOException { @@ -174,6 +184,13 @@ public abstract class AndTreeFilter extends TreeFilter { } @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + return Arrays.stream(subfilters) + .allMatch(t -> t.shouldTreeWalk(c, rw, cpfUsed)); + } + + @Override public TreeFilter clone() { final TreeFilter[] s = new TreeFilter[subfilters.length]; for (int i = 0; i < s.length; i++) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilter.java new file mode 100644 index 0000000000..2400e12240 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ChangedPathTreeFilter.java @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2025, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.treewalk.filter; + +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.util.StringUtils; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Filter tree entries that modified the contents of particular file paths. + * <p> + * Equivalent to AndTreeFilter(PathFilter, AnyDiffFilter). This filter uses + * {@link org.eclipse.jgit.internal.storage.commitgraph.ChangedPathFilter} + * (bloom filters) when available to discard commits without diffing their + * trees. + * + * @since 7.3 + */ +public class ChangedPathTreeFilter extends TreeFilter { + + private TreeFilter pathFilter; + + private List<String> paths; + + private List<byte[]> rawPaths; + + /** + * Create a TreeFilter for trees modifying one or more user supplied paths. + * <p> + * Path strings are relative to the root of the repository. If the user's + * input should be assumed relative to a subdirectory of the repository the + * caller must prepend the subdirectory's path prior to creating the filter. + * <p> + * Path strings use '/' to delimit directories on all platforms. + * <p> + * Paths may appear in any order within the collection. Sorting may be done + * internally when the group is constructed if doing so will improve path + * matching performance. + * + * @param paths + * the paths to test against. Must have at least one entry. + * @return a new filter for the list of paths supplied. + */ + public static ChangedPathTreeFilter create(String... paths) { + return new ChangedPathTreeFilter(paths); + } + + private ChangedPathTreeFilter(String... paths) { + List<String> filtered = Arrays.stream(paths) + .map(s -> StringUtils.trim(s, '/')) + .collect(Collectors.toList()); + + if (filtered.size() == 0) + throw new IllegalArgumentException( + JGitText.get().atLeastOnePathIsRequired); + + if (filtered.stream().anyMatch(s -> s.isEmpty() || s.isBlank())) { + throw new IllegalArgumentException( + JGitText.get().emptyPathNotPermitted); + } + + this.paths = filtered; + this.rawPaths = this.paths.stream().map(Constants::encode) + .collect(Collectors.toList()); + if (filtered.size() == 1) { + this.pathFilter = PathFilter.create(paths[0]); + } else { + this.pathFilter = OrTreeFilter.create(Arrays.stream(paths) + .map(PathFilter::create).collect(Collectors.toList())); + } + } + + @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + ChangedPathFilter cpf = c.getChangedPathFilter(rw); + if (cpf == null) { + return true; + } + if (cpfUsed != null) { + cpfUsed.orValue(true); + } + // return true if at least one path might exist in cpf + return rawPaths.stream().anyMatch(cpf::maybeContains); + } + + @Override + public boolean include(TreeWalk walker) throws IOException { + return pathFilter.include(walker) && ANY_DIFF.include(walker); + } + + @Override + public boolean shouldBeRecursive() { + return pathFilter.shouldBeRecursive() || ANY_DIFF.shouldBeRecursive(); + } + + @Override + public ChangedPathTreeFilter clone() { + return this; + } + + /** + * Get the paths this filter matches. + * + * @return the paths this filter matches. + */ + public List<String> getPaths() { + return paths; + } + + @Override + public String toString() { + return "(CHANGED_PATH(" + pathFilter.toString() + ")" // + + " AND " // + + ANY_DIFF.toString() + ")"; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/OrTreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/OrTreeFilter.java index 3c18a9f98d..ce2382552b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/OrTreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/OrTreeFilter.java @@ -12,11 +12,14 @@ package org.eclipse.jgit.treewalk.filter; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.treewalk.TreeWalk; /** @@ -116,6 +119,13 @@ public abstract class OrTreeFilter extends TreeFilter { } @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + return a.shouldTreeWalk(c, rw, cpfUsed) + || b.shouldTreeWalk(c, rw, cpfUsed); + } + + @Override public boolean shouldBeRecursive() { return a.shouldBeRecursive() || b.shouldBeRecursive(); } @@ -164,6 +174,13 @@ public abstract class OrTreeFilter extends TreeFilter { } @Override + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + MutableBoolean cpfUsed) { + return Arrays.stream(subfilters) + .anyMatch(t -> t.shouldTreeWalk(c, rw, cpfUsed)); + } + + @Override public boolean shouldBeRecursive() { for (TreeFilter f : subfilters) if (f.shouldBeRecursive()) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java index a9066dc8f8..8159843312 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/TreeFilter.java @@ -14,9 +14,12 @@ import java.io.IOException; import java.util.Optional; import java.util.Set; +import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.WorkingTreeIterator; @@ -210,14 +213,38 @@ public abstract class TreeFilter { public abstract boolean shouldBeRecursive(); /** - * If this filter checks that at least one of the paths in a set has been + * Return true if the tree entries within this commit require + * {@link #include(TreeWalk)} to correctly determine whether they are + * interesting to report. + * <p> + * Otherwise, all tree entries within this commit are UNINTERESTING for this + * tree filter. + * + * @param c + * the commit being considered by the TreeFilter. + * @param rw + * the RevWalk used in retrieving relevant commit data. + * @param cpfUsed + * if not null, it reports if the changedPathFilter was used in + * this method + * @return True if the tree entries within c require + * {@link #include(TreeWalk)}. + * @since 7.3 + */ + public boolean shouldTreeWalk(RevCommit c, RevWalk rw, + @Nullable MutableBoolean cpfUsed) { + return true; + } + + /** + * If this filter checks that a specific set of paths have all been * modified, returns that set of paths to be checked against a changed path * filter. Otherwise, returns empty. * * @return a set of paths, or empty - * - * @since 6.7 + * @deprecated use {@code shouldTreeWalk} instead. */ + @Deprecated(since = "7.3") public Optional<Set<byte[]>> getPathsBestEffort() { return Optional.empty(); } @@ -242,4 +269,33 @@ public abstract class TreeFilter { } return n.replace('$', '.'); } + + /** + * Mutable wrapper to return a boolean in a function parameter. + * + * @since 7.3 + */ + public static class MutableBoolean { + private boolean value; + + /** + * Return the boolean value. + * + * @return The state of the internal boolean value. + */ + public boolean get() { + return value; + } + + void orValue(boolean v) { + value = value || v; + } + + /** + * Reset the boolean value. + */ + public void reset() { + value = false; + } + } } |