diff options
author | Jonathan Tan <jonathantanmy@google.com> | 2023-04-18 15:20:02 -0700 |
---|---|---|
committer | Jonathan Tan <jonathantanmy@google.com> | 2023-07-18 14:21:48 -0700 |
commit | 49beb5ae519e429d8868b8afe24007ae2d17003a (patch) | |
tree | 2f62e063dae6bd9d0f0eb040160362913a1cf5b5 | |
parent | 5dc63514d07fbbbd55a287047d756984169cb23d (diff) | |
download | jgit-49beb5ae519e429d8868b8afe24007ae2d17003a.tar.gz jgit-49beb5ae519e429d8868b8afe24007ae2d17003a.zip |
CommitGraphWriter: write changed-path filters
Add support for writing the BIDX and BDAT chunks of the commit graph
file, as described in man gitformat-commit-graph(5). The ability to read
such chunks will be added in a subsequent commit.
This work is based on earlier work by Kyle Zhao
(Ib863782af209f26381e3ca0a2c119b99e84b679c).
Change-Id: Ic18e6f0eeec7da1e1ff31751aabda5e6952dbe6e
Signed-off-by: kylezhao <kylezhao@tencent.com>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
11 files changed, 570 insertions, 13 deletions
@@ -21,6 +21,10 @@ java_library( java_library( name = "commons-codec", + visibility = [ + "//org.eclipse.jgit:__pkg__", + "//org.eclipse.jgit.test:__pkg__", + ], exports = ["@commons-codec//jar"], ) diff --git a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin index ea1d1e3faa..08d37278de 100644 --- a/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin +++ b/org.eclipse.jgit.pgm/META-INF/services/org.eclipse.jgit.pgm.TextBuiltin @@ -46,6 +46,7 @@ org.eclipse.jgit.pgm.debug.BenchmarkReftable org.eclipse.jgit.pgm.debug.DiffAlgorithms org.eclipse.jgit.pgm.debug.LfsStore org.eclipse.jgit.pgm.debug.MakeCacheTree +org.eclipse.jgit.pgm.debug.ReadChangedPathFilter org.eclipse.jgit.pgm.debug.ReadDirCache org.eclipse.jgit.pgm.debug.ReadReftable org.eclipse.jgit.pgm.debug.RebuildCommitGraph diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java new file mode 100644 index 0000000000..6927de84e4 --- /dev/null +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ReadChangedPathFilter.java @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2023, Google LLC + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.pgm.debug; + +import java.io.FileInputStream; +import java.util.HashSet; + +import org.eclipse.jgit.pgm.Command; +import org.eclipse.jgit.pgm.TextBuiltin; +import org.eclipse.jgit.util.NB; +import org.kohsuke.args4j.Argument; + +/** + * Prints the contents of the BDAT chunk from commit-graph file. + * <p> + * This is a debugging tool for changed path filter development. + */ +@Command +class ReadChangedPathFilter extends TextBuiltin { + + static final int CHUNK_ID_OID_FANOUT = 0x4f494446; /* "OIDF" */ + + static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */ + + static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */ + + @Argument(index = 0) + private String input; + + static HashSet<String> changedPathStrings(byte[] data) { + int oidf_offset = -1; + int bidx_offset = -1; + int bdat_offset = -1; + for (int i = 8; i < data.length - 4; i += 12) { + switch (NB.decodeInt32(data, i)) { + case CHUNK_ID_OID_FANOUT: + oidf_offset = (int) NB.decodeInt64(data, i + 4); + break; + case CHUNK_ID_BLOOM_FILTER_INDEX: + bidx_offset = (int) NB.decodeInt64(data, i + 4); + break; + case CHUNK_ID_BLOOM_FILTER_DATA: + bdat_offset = (int) NB.decodeInt64(data, i + 4); + break; + } + } + bdat_offset += 12; // skip version, hash count, bits per entry + int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4); + int[] changed_path_length_cumuls = new int[commit_count]; + for (int i = 0; i < commit_count; i++) { + changed_path_length_cumuls[i] = NB.decodeInt32(data, + bidx_offset + i * 4); + } + HashSet<String> changed_paths = new HashSet<>(); + for (int i = 0; i < commit_count; i++) { + int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1]; + String changed_path = ""; + for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) { + changed_path += data[bdat_offset + j] + ","; + } + changed_paths.add(changed_path); + } + return changed_paths; + } + + /** {@inheritDoc} */ + @Override + protected void run() throws Exception { + try (FileInputStream in = new FileInputStream(input) + ) { + byte[] data = in.readAllBytes(); + outw.println(changedPathStrings(data).toString()); + } + } +} diff --git a/org.eclipse.jgit.test/BUILD b/org.eclipse.jgit.test/BUILD index 9494c64e32..bb15de0b2d 100644 --- a/org.eclipse.jgit.test/BUILD +++ b/org.eclipse.jgit.test/BUILD @@ -77,6 +77,7 @@ java_library( resources = DATA, deps = [ "//lib:assertj-core", + "//lib:commons-codec", "//lib:junit", "//lib:mockito", "//lib:slf4j-simple", diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java index 6c5e5e5605..e7f49496ad 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriterTest.java @@ -10,19 +10,24 @@ package org.eclipse.jgit.internal.storage.commitgraph; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.util.Collections; +import java.util.HashSet; import java.util.Set; +import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.internal.storage.file.FileRepository; import org.eclipse.jgit.junit.RepositoryTestCase; import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.util.NB; @@ -76,11 +81,20 @@ public class CommitGraphWriterTest extends RepositoryTestCase { assertTrue(data.length > 0); byte[] headers = new byte[8]; System.arraycopy(data, 0, headers, 0, 8); - assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 4, 0}, headers); - assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8)); - assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20)); - assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32)); - assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST, NB.decodeInt32(data, 44)); + assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 6, 0 }, + headers); + assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, + NB.decodeInt32(data, 8)); + assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, + NB.decodeInt32(data, 20)); + assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, + NB.decodeInt32(data, 32)); + assertEquals(CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST, + NB.decodeInt32(data, 44)); + assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX, + NB.decodeInt32(data, 56)); + assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA, + NB.decodeInt32(data, 68)); } @Test @@ -101,13 +115,208 @@ public class CommitGraphWriterTest extends RepositoryTestCase { assertTrue(data.length > 0); byte[] headers = new byte[8]; System.arraycopy(data, 0, headers, 0, 8); - assertArrayEquals(new byte[] {'C', 'G', 'P', 'H', 1, 1, 3, 0}, headers); - assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, NB.decodeInt32(data, 8)); - assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, NB.decodeInt32(data, 20)); - assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, NB.decodeInt32(data, 32)); + assertArrayEquals(new byte[] { 'C', 'G', 'P', 'H', 1, 1, 5, 0 }, + headers); + assertEquals(CommitGraphConstants.CHUNK_ID_OID_FANOUT, + NB.decodeInt32(data, 8)); + assertEquals(CommitGraphConstants.CHUNK_ID_OID_LOOKUP, + NB.decodeInt32(data, 20)); + assertEquals(CommitGraphConstants.CHUNK_ID_COMMIT_DATA, + NB.decodeInt32(data, 32)); + assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX, + NB.decodeInt32(data, 44)); + assertEquals(CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA, + NB.decodeInt32(data, 56)); + } + + static HashSet<String> changedPathStrings(byte[] data) { + int oidf_offset = -1; + int bidx_offset = -1; + int bdat_offset = -1; + for (int i = 8; i < data.length - 4; i += 12) { + switch (NB.decodeInt32(data, i)) { + case CommitGraphConstants.CHUNK_ID_OID_FANOUT: + oidf_offset = (int) NB.decodeInt64(data, i + 4); + break; + case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX: + bidx_offset = (int) NB.decodeInt64(data, i + 4); + break; + case CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA: + bdat_offset = (int) NB.decodeInt64(data, i + 4); + break; + } + } + assertTrue(oidf_offset > 0); + assertTrue(bidx_offset > 0); + assertTrue(bdat_offset > 0); + bdat_offset += 12; // skip version, hash count, bits per entry + int commit_count = NB.decodeInt32(data, oidf_offset + 255 * 4); + int[] changed_path_length_cumuls = new int[commit_count]; + for (int i = 0; i < commit_count; i++) { + changed_path_length_cumuls[i] = NB.decodeInt32(data, + bidx_offset + i * 4); + } + HashSet<String> changed_paths = new HashSet<>(); + for (int i = 0; i < commit_count; i++) { + int prior_cumul = i == 0 ? 0 : changed_path_length_cumuls[i - 1]; + String changed_path = ""; + for (int j = prior_cumul; j < changed_path_length_cumuls[i]; j++) { + changed_path += data[bdat_offset + j] + ","; + } + changed_paths.add(changed_path); + } + return changed_paths; + } + + /** + * Expected value generated using the following: + * + * <pre> + * # apply into git-repo: https://lore.kernel.org/git/cover.1684790529.git.jonathantanmy@google.com/ + * (cd git-repo; make) + * git-repo/bin-wrappers/git init tested + * (cd tested; touch foo.txt; mkdir -p onedir/twodir; touch onedir/twodir/bar.txt) + * git-repo/bin-wrappers/git -C tested add foo.txt onedir + * git-repo/bin-wrappers/git -C tested commit -m first_commit + * (cd tested; mv foo.txt foo-new.txt; mv onedir/twodir/bar.txt onedir/twodir/bar-new.txt) + * git-repo/bin-wrappers/git -C tested add foo-new.txt onedir + * git-repo/bin-wrappers/git -C tested commit -a -m second_commit + * git-repo/bin-wrappers/git -C tested maintenance run + * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths + * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph) + * </pre> + * + * @throws Exception + */ + @Test + public void testChangedPathFilterRootAndNested() throws Exception { + RevBlob emptyBlob = tr.blob(new byte[] {}); + RevCommit root = tr.commit(tr.tree(tr.file("foo.txt", emptyBlob), + tr.file("onedir/twodir/bar.txt", emptyBlob))); + RevCommit tip = tr.commit(tr.tree(tr.file("foo-new.txt", emptyBlob), + tr.file("onedir/twodir/bar-new.txt", emptyBlob)), root); + + Set<ObjectId> wants = Collections.singleton(tip); + NullProgressMonitor m = NullProgressMonitor.INSTANCE; + GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk); + writer = new CommitGraphWriter(graphCommits); + writer.write(m, os); + + HashSet<String> changedPaths = changedPathStrings(os.toByteArray()); + assertThat(changedPaths, containsInAnyOrder( + "109,-33,2,60,20,79,-11,116,", + "119,69,63,-8,0,")); + } + + /** + * Expected value generated using the following: + * + * <pre> + * git -C git-repo checkout todo get version number when it is merged + * (cd git-repo; make) + * git-repo/bin-wrappers/git init tested + * (cd tested; mkdir -p onedir/twodir; touch onedir/twodir/a.txt; touch onedir/twodir/b.txt) + * git-repo/bin-wrappers/git -C tested add onedir + * git-repo/bin-wrappers/git -C tested commit -m first_commit + * (cd tested; mv onedir/twodir/a.txt onedir/twodir/c.txt; mv onedir/twodir/b.txt onedir/twodir/d.txt) + * git-repo/bin-wrappers/git -C tested add onedir + * git-repo/bin-wrappers/git -C tested commit -a -m second_commit + * git-repo/bin-wrappers/git -C tested maintenance run + * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths + * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph) + * </pre> + * + * @throws Exception + */ + @Test + public void testChangedPathFilterOverlappingNested() throws Exception { + RevBlob emptyBlob = tr.blob(new byte[] {}); + RevCommit root = tr + .commit(tr.tree(tr.file("onedir/twodir/a.txt", emptyBlob), + tr.file("onedir/twodir/b.txt", emptyBlob))); + RevCommit tip = tr + .commit(tr.tree(tr.file("onedir/twodir/c.txt", emptyBlob), + tr.file("onedir/twodir/d.txt", emptyBlob)), root); + + Set<ObjectId> wants = Collections.singleton(tip); + NullProgressMonitor m = NullProgressMonitor.INSTANCE; + GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk); + writer = new CommitGraphWriter(graphCommits); + writer.write(m, os); + + HashSet<String> changedPaths = changedPathStrings(os.toByteArray()); + assertThat(changedPaths, containsInAnyOrder("61,30,23,-24,1,", + "-58,-51,-46,60,29,-121,113,90,")); + } + + /** + * Expected value generated using the following: + * + * <pre> + * git -C git-repo checkout todo get version number when it is merged + * (cd git-repo; make) + * git-repo/bin-wrappers/git init tested + * (cd tested; touch 你好) + * git-repo/bin-wrappers/git -C tested add 你好 + * git-repo/bin-wrappers/git -C tested commit -m first_commit + * git-repo/bin-wrappers/git -C tested maintenance run + * git-repo/bin-wrappers/git -C tested commit-graph write --changed-paths + * (cd tested; $JGIT debug-read-changed-path-filter .git/objects/info/commit-graph) + * </pre> + * + * @throws Exception + */ + @Test + public void testChangedPathFilterHighBit() throws Exception { + RevBlob emptyBlob = tr.blob(new byte[] {}); + // tr.file encodes using UTF-8 + RevCommit root = tr.commit(tr.tree(tr.file("你好", emptyBlob))); + + Set<ObjectId> wants = Collections.singleton(root); + NullProgressMonitor m = NullProgressMonitor.INSTANCE; + GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk); + writer = new CommitGraphWriter(graphCommits); + writer.write(m, os); + + HashSet<String> changedPaths = changedPathStrings(os.toByteArray()); + assertThat(changedPaths, containsInAnyOrder("16,16,")); + } + + @Test + public void testChangedPathFilterEmptyChange() throws Exception { + RevCommit root = commit(); + + Set<ObjectId> wants = Collections.singleton(root); + NullProgressMonitor m = NullProgressMonitor.INSTANCE; + GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk); + writer = new CommitGraphWriter(graphCommits); + writer.write(m, os); + + HashSet<String> changedPaths = changedPathStrings(os.toByteArray()); + assertThat(changedPaths, containsInAnyOrder("0,")); + } + + @Test + public void testChangedPathFilterManyChanges() throws Exception { + RevBlob emptyBlob = tr.blob(new byte[] {}); + DirCacheEntry[] entries = new DirCacheEntry[513]; + for (int i = 0; i < entries.length; i++) { + entries[i] = tr.file(i + ".txt", emptyBlob); + } + + RevCommit root = tr.commit(tr.tree(entries)); + + Set<ObjectId> wants = Collections.singleton(root); + NullProgressMonitor m = NullProgressMonitor.INSTANCE; + GraphCommits graphCommits = GraphCommits.fromWalk(m, wants, walk); + writer = new CommitGraphWriter(graphCommits); + writer.write(m, os); + + HashSet<String> changedPaths = changedPathStrings(os.toByteArray()); + assertThat(changedPaths, containsInAnyOrder("-1,")); } RevCommit commit(RevCommit... parents) throws Exception { return tr.commit(parents); } -} +}
\ No newline at end of file diff --git a/org.eclipse.jgit/BUILD b/org.eclipse.jgit/BUILD index e806e7d6d0..1e9b7708a0 100644 --- a/org.eclipse.jgit/BUILD +++ b/org.eclipse.jgit/BUILD @@ -20,6 +20,7 @@ java_library( resources = RESOURCES, deps = [ ":insecure_cipher_factory", + "//lib:commons-codec", "//lib:javaewah", "//lib:slf4j-api", ], diff --git a/org.eclipse.jgit/pom.xml b/org.eclipse.jgit/pom.xml index 9d4e5b51a9..23f5d2392c 100644 --- a/org.eclipse.jgit/pom.xml +++ b/org.eclipse.jgit/pom.xml @@ -46,6 +46,12 @@ <artifactId>slf4j-api</artifactId> </dependency> + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + <version>1.15</version> + </dependency> + </dependencies> <build> diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java new file mode 100644 index 0000000000..e9b8971ee3 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2023, Google LLC + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.internal.storage.commitgraph; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.Set; + +import org.apache.commons.codec.digest.MurmurHash3; + +/** + * A changed path filter for a commit. + * + * @since 6.7 + */ +public class ChangedPathFilter { + /** + * The number of times a path is hashed, as described in man + * gitformat-commit-graph(5). The value of this constant is the only value + * JGit currently supports. + */ + public static final int PATH_HASH_COUNT = 7; + + /** + * The minimum bits per entry, as described in man + * gitformat-commit-graph(5). The value of this constant is the only value + * JGit currently supports. + */ + public static final int BITS_PER_ENTRY = 10; + + /** + * Seed value as described in man gitformat-commit-graph(5). + */ + private static final int SEED1 = 0x293ae76f; + + /** + * Seed value as described in man gitformat-commit-graph(5). + */ + private static final int SEED2 = 0x7e646e2c; + + /** + * A filter that matches every path. + */ + public static final ChangedPathFilter FULL = new ChangedPathFilter( + new byte[] { (byte) 0xff }, 0, 1); + + private static final ChangedPathFilter EMPTY = new ChangedPathFilter( + new byte[] { (byte) 0 }, 0, 1); + + private final byte[] data; + + private final int offset; + + private final int length; + + /** + * Constructs a changed path filter. + * + * @param data + * data (possibly read from a commit graph file) + * @param offset + * offset into data + * @param length + * length of data + */ + private ChangedPathFilter(byte[] data, int offset, int length) { + this.data = data; + this.offset = offset; + this.length = length; + } + + /** + * Returns a filter that matches all given paths. + * <p> + * Because of the nature of Bloom filters, this filter may also match paths + * not in the given set. + * + * @param paths + * the paths that the filter must match + * @return the corresponding filter + */ + public static ChangedPathFilter fromPaths(Set<ByteBuffer> paths) { + if (paths.isEmpty()) { + return EMPTY; + } + byte[] bloom = new byte[-Math + .floorDiv(-paths.size() * ChangedPathFilter.BITS_PER_ENTRY, 8)]; + for (ByteBuffer path : paths) { + add(bloom, path.array(), path.position(), + path.limit() - path.position()); + } + return new ChangedPathFilter(bloom, 0, bloom.length); + } + + private static void add(byte[] changedPathFilterData, byte[] path, + int offset, int length) { + + int hash0 = MurmurHash3.hash32x86(path, offset, length, SEED1); + int hash1 = MurmurHash3.hash32x86(path, offset, length, SEED2); + for (int i = 0; i < PATH_HASH_COUNT; i++) { + int pos = Integer.remainderUnsigned(hash0 + i * hash1, + changedPathFilterData.length * 8); + changedPathFilterData[pos / 8] |= (byte) (1 << (pos % 8)); + } + } + + /** + * Writes this filter to the given stream. + * + * @param s + * stream to write to + */ + public void writeTo(ByteArrayOutputStream s) { + s.write(data, offset, length); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java index 422d3be728..8d2789cf52 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphConstants.java @@ -25,6 +25,10 @@ class CommitGraphConstants { static final int CHUNK_ID_EXTRA_EDGE_LIST = 0x45444745; /* "EDGE" */ + static final int CHUNK_ID_BLOOM_FILTER_INDEX = 0x42494458; /* "BIDX" */ + + static final int CHUNK_ID_BLOOM_FILTER_DATA = 0x42444154; /* "BDAT" */ + /** * First 4 bytes describe the chunk id. Value 0 is a terminating label. * Other 8 bytes provide the byte-offset in current file for chunk to start. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java index 9a57f38a7d..f1b4f55299 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphWriter.java @@ -10,6 +10,8 @@ package org.eclipse.jgit.internal.storage.commitgraph; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT; @@ -24,21 +26,30 @@ import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_NOT_COMPUTED; import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InterruptedIOException; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.text.MessageFormat; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Stack; import org.eclipse.jgit.annotations.NonNull; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.treewalk.EmptyTreeIterator; +import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.util.NB; /** @@ -56,6 +67,8 @@ public class CommitGraphWriter { private static final int GENERATION_NUMBER_MAX = 0x3FFFFFFF; + private static final int MAX_CHANGED_PATHS = 512; + private final int hashsz; private final GraphCommits graphCommits; @@ -111,7 +124,8 @@ public class CommitGraphWriter { } } - private List<ChunkHeader> createChunks() { + private List<ChunkHeader> createChunks() throws MissingObjectException, + IncorrectObjectTypeException, CorruptObjectException, IOException { List<ChunkHeader> chunks = new ArrayList<>(); chunks.add(new ChunkHeader(CHUNK_ID_OID_FANOUT, GRAPH_FANOUT_SIZE)); chunks.add(new ChunkHeader(CHUNK_ID_OID_LOOKUP, @@ -122,6 +136,11 @@ public class CommitGraphWriter { chunks.add(new ChunkHeader(CHUNK_ID_EXTRA_EDGE_LIST, graphCommits.getExtraEdgeCnt() * 4)); } + BloomFilterChunks bloomFilterChunks = computeBloomFilterChunks(); + chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_INDEX, + bloomFilterChunks.index)); + chunks.add(new ChunkHeader(CHUNK_ID_BLOOM_FILTER_DATA, + bloomFilterChunks.data)); return chunks; } @@ -171,6 +190,14 @@ public class CommitGraphWriter { case CHUNK_ID_EXTRA_EDGE_LIST: writeExtraEdges(out); break; + case CHUNK_ID_BLOOM_FILTER_INDEX: + case CHUNK_ID_BLOOM_FILTER_DATA: + if (!chunk.data.isPresent()) { + throw new IllegalStateException( + "data for this chunk must be precomputed"); //$NON-NLS-1$ + } + chunk.data.get().writeTo(out); + break; } } } @@ -305,6 +332,72 @@ public class CommitGraphWriter { return generations; } + private static Optional<HashSet<ByteBuffer>> computeBloomFilterPaths( + ObjectReader or, RevCommit cmit) throws MissingObjectException, + IncorrectObjectTypeException, CorruptObjectException, IOException { + HashSet<ByteBuffer> paths = new HashSet<>(); + try (TreeWalk walk = new TreeWalk(null, or)) { + walk.setRecursive(true); + if (cmit.getParentCount() == 0) { + walk.addTree(new EmptyTreeIterator()); + } else { + walk.addTree(cmit.getParent(0).getTree()); + } + walk.addTree(cmit.getTree()); + while (walk.next()) { + if (walk.idEqual(0, 1)) { + continue; + } + byte[] rawPath = walk.getRawPath(); + paths.add(ByteBuffer.wrap(rawPath)); + for (int i = 0; i < rawPath.length; i++) { + if (rawPath[i] == '/') { + paths.add(ByteBuffer.wrap(rawPath, 0, i)); + } + if (paths.size() > MAX_CHANGED_PATHS) { + return Optional.empty(); + } + } + } + } + return Optional.of(paths); + } + + private BloomFilterChunks computeBloomFilterChunks() + throws MissingObjectException, IncorrectObjectTypeException, + CorruptObjectException, IOException { + + ByteArrayOutputStream index = new ByteArrayOutputStream(); + ByteArrayOutputStream data = new ByteArrayOutputStream(); + + // Allocate scratch buffer for converting integers into + // big-endian bytes. + byte[] scratch = new byte[4]; + + NB.encodeInt32(scratch, 0, 1); // version 1 + data.write(scratch); + NB.encodeInt32(scratch, 0, ChangedPathFilter.PATH_HASH_COUNT); + data.write(scratch); + NB.encodeInt32(scratch, 0, ChangedPathFilter.BITS_PER_ENTRY); + data.write(scratch); + int dataHeaderSize = data.size(); + + for (RevCommit cmit : graphCommits) { + Optional<HashSet<ByteBuffer>> paths = computeBloomFilterPaths( + graphCommits.getObjectReader(), cmit); + ChangedPathFilter cpf; + if (paths.isEmpty()) { + cpf = ChangedPathFilter.FULL; + } else { + cpf = ChangedPathFilter.fromPaths(paths.get()); + } + cpf.writeTo(data); + NB.encodeInt32(scratch, 0, data.size() - dataHeaderSize); + index.write(scratch); + } + return new BloomFilterChunks(index, data); + } + private void writeExtraEdges(CancellableDigestOutputStream out) throws IOException { byte[] tmp = new byte[4]; @@ -331,9 +424,30 @@ public class CommitGraphWriter { final long size; + final Optional<ByteArrayOutputStream> data; + public ChunkHeader(int id, long size) { this.id = id; this.size = size; + this.data = Optional.empty(); + } + + ChunkHeader(int id, ByteArrayOutputStream data) { + this.id = id; + this.size = data.size(); + this.data = Optional.of(data); + } + } + + private static class BloomFilterChunks { + final ByteArrayOutputStream index; + + final ByteArrayOutputStream data; + + BloomFilterChunks(ByteArrayOutputStream index, + ByteArrayOutputStream data) { + this.index = index; + this.data = data; } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java index a9a603ff57..c77d950377 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphCommits.java @@ -23,6 +23,7 @@ import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdOwnerMap; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevObject; @@ -79,7 +80,7 @@ public class GraphCommits implements Iterable<RevCommit> { commits.add(c); } pm.endTask(); - return new GraphCommits(commits); + return new GraphCommits(commits, walk.getObjectReader()); } private final List<RevCommit> sortedCommits; @@ -88,13 +89,17 @@ public class GraphCommits implements Iterable<RevCommit> { private final int extraEdgeCnt; + private final ObjectReader objectReader; + /** * Initialize the GraphCommits. * * @param commits * list of commits with their headers already parsed. + * @param objectReader + * object reader */ - private GraphCommits(List<RevCommit> commits) { + private GraphCommits(List<RevCommit> commits, ObjectReader objectReader) { Collections.sort(commits); // sorted by name sortedCommits = commits; commitPosMap = new ObjectIdOwnerMap<>(); @@ -107,6 +112,7 @@ public class GraphCommits implements Iterable<RevCommit> { commitPosMap.add(new CommitWithPosition(c, i)); } this.extraEdgeCnt = cnt; + this.objectReader = objectReader; } int getOidPosition(RevCommit c) throws MissingObjectException { @@ -125,6 +131,10 @@ public class GraphCommits implements Iterable<RevCommit> { return sortedCommits.size(); } + ObjectReader getObjectReader() { + return objectReader; + } + @Override public Iterator<RevCommit> iterator() { return sortedCommits.iterator(); |