summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Tan <jonathantanmy@google.com>2023-04-18 15:20:02 -0700
committerJonathan Tan <jonathantanmy@google.com>2023-07-18 14:21:48 -0700
commit49beb5ae519e429d8868b8afe24007ae2d17003a (patch)
tree2f62e063dae6bd9d0f0eb040160362913a1cf5b5
parent5dc63514d07fbbbd55a287047d756984169cb23d (diff)
downloadjgit-49beb5ae519e429d8868b8afe24007ae2d17003a.tar.gz
jgit-49beb5ae519e429d8868b8afe24007ae2d17003a.zip
CommitGraphWriter: write changed-path filters
Add support for writing the BIDX and BDAT chunks of the commit graph file, as described in man gitformat-commit-graph(5). The ability to read such chunks will be added in a subsequent commit. This work is based on earlier work by Kyle Zhao (Ib863782af209f26381e3ca0a2c119b99e84b679c). Change-Id: Ic18e6f0eeec7da1e1ff31751aabda5e6952dbe6e Signed-off-by: kylezhao <kylezhao@tencent.com> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
-rw-r--r--lib/BUILD4
-rw-r--r--org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin1
-rw-r--r--org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java83
-rw-r--r--org.eclipse.jgit.test/BUILD1
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java229
-rw-r--r--org.eclipse.jgit/BUILD1
-rw-r--r--org.eclipse.jgit/pom.xml6
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java124
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java4
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java116
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java14
11 files changed, 570 insertions, 13 deletions
diff --git a/lib/BUILD b/lib/BUILD
index 6be9e575fe..29669b5d2f 100644
--- a/lib/BUILD
+++ b/lib/BUILD
@@ -21,6 +21,10 @@ java_library(
java_library(
name = "commons-codec",
+ visibility = [
+ "//org.eclipse.jgit:__pkg__",
+ "//org.eclipse.jgit.test:__pkg__",
+ ],
exports = ["@commons-codec//jar"],
)
diff --git a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
index ea1d1e3faa..08d37278de 100644
--- a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
+++ b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin
@@ -46,6 +46,7 @@ org.eclipse.jgit.pgm.debug.BenchmarkReftable
org.eclipse.jgit.pgm.debug.DiffAlgorithms
org.eclipse.jgit.pgm.debug.LfsStore
org.eclipse.jgit.pgm.debug.MakeCacheTree
+org.eclipse.jgit.pgm.debug.ReadChangedPathFilter
org.eclipse.jgit.pgm.debug.ReadDirCache
org.eclipse.jgit.pgm.debug.ReadReftable
org.eclipse.jgit.pgm.debug.RebuildCommitGraph
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java
new file mode 100644
index 0000000000..6927de84e4
--- /dev/null
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2023, Google LLC
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.pgm.debug;
+
+import java.io.FileInputStream;
+import java.util.HashSet;
+
+import org.eclipse.jgit.pgm.Command;
+import org.eclipse.jgit.pgm.TextBuiltin;
+import org.eclipse.jgit.util.NB;
+import org.kohsuke.args4j.Argument;
+
+/**
+ * Prints the contents of the BDAT chunk from commit-graph file.
+ * <p>
+ * This is a debugging tool for changed path filter development.
+ */
+@Command
+class ReadChangedPathFilter extends TextBuiltin {
+
+ static final int CHUNK_ID_OID_FANOUT = 0x4f494446; /* "OIDF" */
+
+ static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
+
+ static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
+
+ @Argument(index = 0)
+ private String input;
+
+ static HashSet<String> changedPathStrings(byte[] data) {
+ int oidf_offset = -1;
+ int bidx_offset = -1;
+ int bdat_offset = -1;
+ for (int i = 8; i < data.length - 4; i += 12) {
+ switch (NB.decodeInt32(data, i)) {
+ case CHUNK_ID_OID_FANOUT:
+ oidf_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ case CHUNK_ID_BLOOM_FILTER_INDEX:
+ bidx_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ case CHUNK_ID_BLOOM_FILTER_DATA:
+ bdat_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ }
+ }
+ bdat_offset += 12; // skip version, hash count, bits per entry
+ int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
+ int[] changed_path_length_cumuls = new int[commit_count];
+ for (int i = 0; i < commit_count; i++) {
+ changed_path_length_cumuls[i] = NB.decodeInt32(data,
+ bidx_offset + i * 4);
+ }
+ HashSet<String> changed_paths = new HashSet<>();
+ for (int i = 0; i < commit_count; i++) {
+ int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
+ String changed_path = "";
+ for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
+ changed_path += data[bdat_offset + j] + ",";
+ }
+ changed_paths.add(changed_path);
+ }
+ return changed_paths;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected void run() throws Exception {
+ try (FileInputStream in = new FileInputStream(input)
+ ) {
+ byte[] data = in.readAllBytes();
+ outw.println(changedPathStrings(data).toString());
+ }
+ }
+}
diff --git a/org.eclipse.jgit.test/BUILD b/org.eclipse.jgit.test/BUILD
index 9494c64e32..bb15de0b2d 100644
--- a/org.eclipse.jgit.test/BUILD
+++ b/org.eclipse.jgit.test/BUILD
@@ -77,6 +77,7 @@ java_library(
resources = DATA,
deps = [
"//lib:assertj-core",
+ "//lib:commons-codec",
"//lib:junit",
"//lib:mockito",
"//lib:slf4j-simple",
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java
index 6c5e5e5605..e7f49496ad 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java
@@ -10,19 +10,24 @@
package org.eclipse.jgit.internal.storage.commitgraph;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Set;
+import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.junit.RepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.util.NB;
@@ -76,11 +81,20 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
assertTrue(data.length > 0);
byte[] headers = new byte[8];
System.arraycopy(data, 0, headers, 0, 8);
- assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 4, 0}, headers);
- assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
- assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
- assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
- assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST, NB.decodeInt32(data, 44));
+ assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 6, 0 },
+ headers);
+ assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
+ NB.decodeInt32(data, 8));
+ assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
+ NB.decodeInt32(data, 20));
+ assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
+ NB.decodeInt32(data, 32));
+ assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST,
+ NB.decodeInt32(data, 44));
+ assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
+ NB.decodeInt32(data, 56));
+ assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
+ NB.decodeInt32(data, 68));
}
@Test
@@ -101,13 +115,208 @@ public class CommitGraphWriterTest extends RepositoryTestCase {
assertTrue(data.length > 0);
byte[] headers = new byte[8];
System.arraycopy(data, 0, headers, 0, 8);
- assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 3, 0}, headers);
- assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8));
- assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20));
- assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32));
+ assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 5, 0 },
+ headers);
+ assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT,
+ NB.decodeInt32(data, 8));
+ assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP,
+ NB.decodeInt32(data, 20));
+ assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA,
+ NB.decodeInt32(data, 32));
+ assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX,
+ NB.decodeInt32(data, 44));
+ assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA,
+ NB.decodeInt32(data, 56));
+ }
+
+ static HashSet<String> changedPathStrings(byte[] data) {
+ int oidf_offset = -1;
+ int bidx_offset = -1;
+ int bdat_offset = -1;
+ for (int i = 8; i < data.length - 4; i += 12) {
+ switch (NB.decodeInt32(data, i)) {
+ case CommitGraphConstants.CHUNK_ID_OID_FANOUT:
+ oidf_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX:
+ bidx_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA:
+ bdat_offset = (int) NB.decodeInt64(data, i + 4);
+ break;
+ }
+ }
+ assertTrue(oidf_offset > 0);
+ assertTrue(bidx_offset > 0);
+ assertTrue(bdat_offset > 0);
+ bdat_offset += 12; // skip version, hash count, bits per entry
+ int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4);
+ int[] changed_path_length_cumuls = new int[commit_count];
+ for (int i = 0; i < commit_count; i++) {
+ changed_path_length_cumuls[i] = NB.decodeInt32(data,
+ bidx_offset + i * 4);
+ }
+ HashSet<String> changed_paths = new HashSet<>();
+ for (int i = 0; i < commit_count; i++) {
+ int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1];
+ String changed_path = "";
+ for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) {
+ changed_path += data[bdat_offset + j] + ",";
+ }
+ changed_paths.add(changed_path);
+ }
+ return changed_paths;
+ }
+
+ /**
+ * Expected value generated using the following:
+ *
+ * <pre>
+ * # apply into git-repo: https://lore.kernel.org/git/cover.1684790529.git.jonathantanmy@google.com/
+ * (cd git-repo; make)
+ * git-repo/bin-wrappers/git init tested
+ * (cd tested; touch foo.txt; mkdir -p onedir/twodir; touch onedir/twodir/bar.txt)
+ * git-repo/bin-wrappers/git -C tested add foo.txt onedir
+ * git-repo/bin-wrappers/git -C tested commit -m first_commit
+ * (cd tested; mv foo.txt foo-new.txt; mv onedir/twodir/bar.txt onedir/twodir/bar-new.txt)
+ * git-repo/bin-wrappers/git -C tested add foo-new.txt onedir
+ * git-repo/bin-wrappers/git -C tested commit -a -m second_commit
+ * git-repo/bin-wrappers/git -C tested maintenance run
+ * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
+ * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
+ * </pre>
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testChangedPathFilterRootAndNested() throws Exception {
+ RevBlob emptyBlob = tr.blob(new byte[] {});
+ RevCommit root = tr.commit(tr.tree(tr.file("foo.txt", emptyBlob),
+ tr.file("onedir/twodir/bar.txt", emptyBlob)));
+ RevCommit tip = tr.commit(tr.tree(tr.file("foo-new.txt", emptyBlob),
+ tr.file("onedir/twodir/bar-new.txt", emptyBlob)), root);
+
+ Set<ObjectId> wants = Collections.singleton(tip);
+ NullProgressMonitor m = NullProgressMonitor.INSTANCE;
+ GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
+ writer = new CommitGraphWriter(graphCommits);
+ writer.write(m, os);
+
+ HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
+ assertThat(changedPaths, containsInAnyOrder(
+ "109,-33,2,60,20,79,-11,116,",
+ "119,69,63,-8,0,"));
+ }
+
+ /**
+ * Expected value generated using the following:
+ *
+ * <pre>
+ * git -C git-repo checkout todo get version number when it is merged
+ * (cd git-repo; make)
+ * git-repo/bin-wrappers/git init tested
+ * (cd tested; mkdir -p onedir/twodir; touch onedir/twodir/a.txt; touch onedir/twodir/b.txt)
+ * git-repo/bin-wrappers/git -C tested add onedir
+ * git-repo/bin-wrappers/git -C tested commit -m first_commit
+ * (cd tested; mv onedir/twodir/a.txt onedir/twodir/c.txt; mv onedir/twodir/b.txt onedir/twodir/d.txt)
+ * git-repo/bin-wrappers/git -C tested add onedir
+ * git-repo/bin-wrappers/git -C tested commit -a -m second_commit
+ * git-repo/bin-wrappers/git -C tested maintenance run
+ * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
+ * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
+ * </pre>
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testChangedPathFilterOverlappingNested() throws Exception {
+ RevBlob emptyBlob = tr.blob(new byte[] {});
+ RevCommit root = tr
+ .commit(tr.tree(tr.file("onedir/twodir/a.txt", emptyBlob),
+ tr.file("onedir/twodir/b.txt", emptyBlob)));
+ RevCommit tip = tr
+ .commit(tr.tree(tr.file("onedir/twodir/c.txt", emptyBlob),
+ tr.file("onedir/twodir/d.txt", emptyBlob)), root);
+
+ Set<ObjectId> wants = Collections.singleton(tip);
+ NullProgressMonitor m = NullProgressMonitor.INSTANCE;
+ GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
+ writer = new CommitGraphWriter(graphCommits);
+ writer.write(m, os);
+
+ HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
+ assertThat(changedPaths, containsInAnyOrder("61,30,23,-24,1,",
+ "-58,-51,-46,60,29,-121,113,90,"));
+ }
+
+ /**
+ * Expected value generated using the following:
+ *
+ * <pre>
+ * git -C git-repo checkout todo get version number when it is merged
+ * (cd git-repo; make)
+ * git-repo/bin-wrappers/git init tested
+ * (cd tested; touch 你好)
+ * git-repo/bin-wrappers/git -C tested add 你好
+ * git-repo/bin-wrappers/git -C tested commit -m first_commit
+ * git-repo/bin-wrappers/git -C tested maintenance run
+ * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths
+ * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph)
+ * </pre>
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testChangedPathFilterHighBit() throws Exception {
+ RevBlob emptyBlob = tr.blob(new byte[] {});
+ // tr.file encodes using UTF-8
+ RevCommit root = tr.commit(tr.tree(tr.file("你好", emptyBlob)));
+
+ Set<ObjectId> wants = Collections.singleton(root);
+ NullProgressMonitor m = NullProgressMonitor.INSTANCE;
+ GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
+ writer = new CommitGraphWriter(graphCommits);
+ writer.write(m, os);
+
+ HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
+ assertThat(changedPaths, containsInAnyOrder("16,16,"));
+ }
+
+ @Test
+ public void testChangedPathFilterEmptyChange() throws Exception {
+ RevCommit root = commit();
+
+ Set<ObjectId> wants = Collections.singleton(root);
+ NullProgressMonitor m = NullProgressMonitor.INSTANCE;
+ GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
+ writer = new CommitGraphWriter(graphCommits);
+ writer.write(m, os);
+
+ HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
+ assertThat(changedPaths, containsInAnyOrder("0,"));
+ }
+
+ @Test
+ public void testChangedPathFilterManyChanges() throws Exception {
+ RevBlob emptyBlob = tr.blob(new byte[] {});
+ DirCacheEntry[] entries = new DirCacheEntry[513];
+ for (int i = 0; i < entries.length; i++) {
+ entries[i] = tr.file(i + ".txt", emptyBlob);
+ }
+
+ RevCommit root = tr.commit(tr.tree(entries));
+
+ Set<ObjectId> wants = Collections.singleton(root);
+ NullProgressMonitor m = NullProgressMonitor.INSTANCE;
+ GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk);
+ writer = new CommitGraphWriter(graphCommits);
+ writer.write(m, os);
+
+ HashSet<String> changedPaths = changedPathStrings(os.toByteArray());
+ assertThat(changedPaths, containsInAnyOrder("-1,"));
}
RevCommit commit(RevCommit... parents) throws Exception {
return tr.commit(parents);
}
-}
+} \ No newline at end of file
diff --git a/org.eclipse.jgit/BUILD b/org.eclipse.jgit/BUILD
index e806e7d6d0..1e9b7708a0 100644
--- a/org.eclipse.jgit/BUILD
+++ b/org.eclipse.jgit/BUILD
@@ -20,6 +20,7 @@ java_library(
resources = RESOURCES,
deps = [
":insecure_cipher_factory",
+ "//lib:commons-codec",
"//lib:javaewah",
"//lib:slf4j-api",
],
diff --git a/org.eclipse.jgit/pom.xml b/org.eclipse.jgit/pom.xml
index 9d4e5b51a9..23f5d2392c 100644
--- a/org.eclipse.jgit/pom.xml
+++ b/org.eclipse.jgit/pom.xml
@@ -46,6 +46,12 @@
<artifactId>slf4j-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>1.15</version>
+ </dependency>
+
</dependencies>
<build>
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java
new file mode 100644
index 0000000000..e9b8971ee3
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2023, Google LLC
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.internal.storage.commitgraph;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.util.Set;
+
+import org.apache.commons.codec.digest.MurmurHash3;
+
+/**
+ * A changed path filter for a commit.
+ *
+ * @since 6.7
+ */
+public class ChangedPathFilter {
+ /**
+ * The number of times a path is hashed, as described in man
+ * gitformat-commit-graph(5). The value of this constant is the only value
+ * JGit currently supports.
+ */
+ public static final int PATH_HASH_COUNT = 7;
+
+ /**
+ * The minimum bits per entry, as described in man
+ * gitformat-commit-graph(5). The value of this constant is the only value
+ * JGit currently supports.
+ */
+ public static final int BITS_PER_ENTRY = 10;
+
+ /**
+ * Seed value as described in man gitformat-commit-graph(5).
+ */
+ private static final int SEED1 = 0x293ae76f;
+
+ /**
+ * Seed value as described in man gitformat-commit-graph(5).
+ */
+ private static final int SEED2 = 0x7e646e2c;
+
+ /**
+ * A filter that matches every path.
+ */
+ public static final ChangedPathFilter FULL = new ChangedPathFilter(
+ new byte[] { (byte) 0xff }, 0, 1);
+
+ private static final ChangedPathFilter EMPTY = new ChangedPathFilter(
+ new byte[] { (byte) 0 }, 0, 1);
+
+ private final byte[] data;
+
+ private final int offset;
+
+ private final int length;
+
+ /**
+ * Constructs a changed path filter.
+ *
+ * @param data
+ * data (possibly read from a commit graph file)
+ * @param offset
+ * offset into data
+ * @param length
+ * length of data
+ */
+ private ChangedPathFilter(byte[] data, int offset, int length) {
+ this.data = data;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ /**
+ * Returns a filter that matches all given paths.
+ * <p>
+ * Because of the nature of Bloom filters, this filter may also match paths
+ * not in the given set.
+ *
+ * @param paths
+ * the paths that the filter must match
+ * @return the corresponding filter
+ */
+ public static ChangedPathFilter fromPaths(Set<ByteBuffer> paths) {
+ if (paths.isEmpty()) {
+ return EMPTY;
+ }
+ byte[] bloom = new byte[-Math
+ .floorDiv(-paths.size() * ChangedPathFilter.BITS_PER_ENTRY, 8)];
+ for (ByteBuffer path : paths) {
+ add(bloom, path.array(), path.position(),
+ path.limit() - path.position());
+ }
+ return new ChangedPathFilter(bloom, 0, bloom.length);
+ }
+
+ private static void add(byte[] changedPathFilterData, byte[] path,
+ int offset, int length) {
+
+ int hash0 = MurmurHash3.hash32x86(path, offset, length, SEED1);
+ int hash1 = MurmurHash3.hash32x86(path, offset, length, SEED2);
+ for (int i = 0; i < PATH_HASH_COUNT; i++) {
+ int pos = Integer.remainderUnsigned(hash0 + i * hash1,
+ changedPathFilterData.length * 8);
+ changedPathFilterData[pos / 8] |= (byte) (1 << (pos % 8));
+ }
+ }
+
+ /**
+ * Writes this filter to the given stream.
+ *
+ * @param s
+ * stream to write to
+ */
+ public void writeTo(ByteArrayOutputStream s) {
+ s.write(data, offset, length);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java
index 422d3be728..8d2789cf52 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java
@@ -25,6 +25,10 @@ class CommitGraphConstants {
static final int CHUNK_ID_EXTRA_EDGE_LIST = 0x45444745; /* "EDGE" */
+ static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */
+
+ static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */
+
/**
* First 4 bytes describe the chunk id. Value 0 is a terminating label.
* Other 8 bytes provide the byte-offset in current file for chunk to start.
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java
index 9a57f38a7d..f1b4f55299 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java
@@ -10,6 +10,8 @@
package org.eclipse.jgit.internal.storage.commitgraph;
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@@ -24,21 +26,30 @@ import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_NOT_COMPUTED;
import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
import java.text.MessageFormat;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Optional;
import java.util.Stack;
import org.eclipse.jgit.annotations.NonNull;
+import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream;
import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.treewalk.EmptyTreeIterator;
+import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.NB;
/**
@@ -56,6 +67,8 @@ public class CommitGraphWriter {
private static final int GENERATION_NUMBER_MAX = 0x3FFFFFFF;
+ private static final int MAX_CHANGED_PATHS = 512;
+
private final int hashsz;
private final GraphCommits graphCommits;
@@ -111,7 +124,8 @@ public class CommitGraphWriter {
}
}
- private List<ChunkHeader> createChunks() {
+ private List<ChunkHeader> createChunks() throws MissingObjectException,
+ IncorrectObjectTypeException, CorruptObjectException, IOException {
List<ChunkHeader> chunks = new ArrayList<>();
chunks.add(new ChunkHeader(CHUNK_ID_OID_FANOUT, GRAPH_FANOUT_SIZE));
chunks.add(new ChunkHeader(CHUNK_ID_OID_LOOKUP,
@@ -122,6 +136,11 @@ public class CommitGraphWriter {
chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST,
graphCommits.getExtraEdgeCnt() * 4));
}
+ BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks();
+ chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX,
+ bloomFilterChunks.index));
+ chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA,
+ bloomFilterChunks.data));
return chunks;
}
@@ -171,6 +190,14 @@ public class CommitGraphWriter {
case CHUNK_ID_EXTRA_EDGE_LIST:
writeExtraEdges(out);
break;
+ case CHUNK_ID_BLOOM_FILTER_INDEX:
+ case CHUNK_ID_BLOOM_FILTER_DATA:
+ if (!chunk.data.isPresent()) {
+ throw new IllegalStateException(
+ "data for this chunk must be precomputed"); //$NON-NLS-1$
+ }
+ chunk.data.get().writeTo(out);
+ break;
}
}
}
@@ -305,6 +332,72 @@ public class CommitGraphWriter {
return generations;
}
+ private static Optional<HashSet<ByteBuffer>> computeBloomFilterPaths(
+ ObjectReader or, RevCommit cmit) throws MissingObjectException,
+ IncorrectObjectTypeException, CorruptObjectException, IOException {
+ HashSet<ByteBuffer> paths = new HashSet<>();
+ try (TreeWalk walk = new TreeWalk(null, or)) {
+ walk.setRecursive(true);
+ if (cmit.getParentCount() == 0) {
+ walk.addTree(new EmptyTreeIterator());
+ } else {
+ walk.addTree(cmit.getParent(0).getTree());
+ }
+ walk.addTree(cmit.getTree());
+ while (walk.next()) {
+ if (walk.idEqual(0, 1)) {
+ continue;
+ }
+ byte[] rawPath = walk.getRawPath();
+ paths.add(ByteBuffer.wrap(rawPath));
+ for (int i = 0; i < rawPath.length; i++) {
+ if (rawPath[i] == '/') {
+ paths.add(ByteBuffer.wrap(rawPath, 0, i));
+ }
+ if (paths.size() > MAX_CHANGED_PATHS) {
+ return Optional.empty();
+ }
+ }
+ }
+ }
+ return Optional.of(paths);
+ }
+
+ private BloomFilterChunks computeBloomFilterChunks()
+ throws MissingObjectException, IncorrectObjectTypeException,
+ CorruptObjectException, IOException {
+
+ ByteArrayOutputStream index = new ByteArrayOutputStream();
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+
+ // Allocate scratch buffer for converting integers into
+ // big-endian bytes.
+ byte[] scratch = new byte[4];
+
+ NB.encodeInt32(scratch, 0, 1); // version 1
+ data.write(scratch);
+ NB.encodeInt32(scratch, 0, ChangedPathFilter.PATH_HASH_COUNT);
+ data.write(scratch);
+ NB.encodeInt32(scratch, 0, ChangedPathFilter.BITS_PER_ENTRY);
+ data.write(scratch);
+ int dataHeaderSize = data.size();
+
+ for (RevCommit cmit : graphCommits) {
+ Optional<HashSet<ByteBuffer>> paths = computeBloomFilterPaths(
+ graphCommits.getObjectReader(), cmit);
+ ChangedPathFilter cpf;
+ if (paths.isEmpty()) {
+ cpf = ChangedPathFilter.FULL;
+ } else {
+ cpf = ChangedPathFilter.fromPaths(paths.get());
+ }
+ cpf.writeTo(data);
+ NB.encodeInt32(scratch, 0, data.size() - dataHeaderSize);
+ index.write(scratch);
+ }
+ return new BloomFilterChunks(index, data);
+ }
+
private void writeExtraEdges(CancellableDigestOutputStream out)
throws IOException {
byte[] tmp = new byte[4];
@@ -331,9 +424,30 @@ public class CommitGraphWriter {
final long size;
+ final Optional<ByteArrayOutputStream> data;
+
public ChunkHeader(int id, long size) {
this.id = id;
this.size = size;
+ this.data = Optional.empty();
+ }
+
+ ChunkHeader(int id, ByteArrayOutputStream data) {
+ this.id = id;
+ this.size = data.size();
+ this.data = Optional.of(data);
+ }
+ }
+
+ private static class BloomFilterChunks {
+ final ByteArrayOutputStream index;
+
+ final ByteArrayOutputStream data;
+
+ BloomFilterChunks(ByteArrayOutputStream index,
+ ByteArrayOutputStream data) {
+ this.index = index;
+ this.data = data;
}
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java
index a9a603ff57..c77d950377 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java
@@ -23,6 +23,7 @@ import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdOwnerMap;
+import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject;
@@ -79,7 +80,7 @@ public class GraphCommits implements Iterable<RevCommit> {
commits.add(c);
}
pm.endTask();
- return new GraphCommits(commits);
+ return new GraphCommits(commits, walk.getObjectReader());
}
private final List<RevCommit> sortedCommits;
@@ -88,13 +89,17 @@ public class GraphCommits implements Iterable<RevCommit> {
private final int extraEdgeCnt;
+ private final ObjectReader objectReader;
+
/**
* Initialize the GraphCommits.
*
* @param commits
* list of commits with their headers already parsed.
+ * @param objectReader
+ * object reader
*/
- private GraphCommits(List<RevCommit> commits) {
+ private GraphCommits(List<RevCommit> commits, ObjectReader objectReader) {
Collections.sort(commits); // sorted by name
sortedCommits = commits;
commitPosMap = new ObjectIdOwnerMap<>();
@@ -107,6 +112,7 @@ public class GraphCommits implements Iterable<RevCommit> {
commitPosMap.add(new CommitWithPosition(c, i));
}
this.extraEdgeCnt = cnt;
+ this.objectReader = objectReader;
}
int getOidPosition(RevCommit c) throws MissingObjectException {
@@ -125,6 +131,10 @@ public class GraphCommits implements Iterable<RevCommit> {
return sortedCommits.size();
}
+ ObjectReader getObjectReader() {
+ return objectReader;
+ }
+
@Override
public Iterator<RevCommit> iterator() {
return sortedCommits.iterator();