From ff0f7c174f1af853a158321468492324f05cf052 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Apr 2023 12:55:30 -0700 Subject: [PATCH] CommitGraphLoader: read changed-path filters As described in the parent commit, add support for reading the BIDX and BDAT chunks of the commit graph file, as described in man gitformat- commit-graph(5). This work is based on earlier work by Kyle Zhao (I160f6b022afaa842c331fb9a086974e49dced7b2). Change-Id: I82e02e6a3a3b758e6bf9d7bbd2198f0ffe3a331b Signed-off-by: kylezhao Signed-off-by: Jonathan Tan --- .../storage/commitgraph/CommitGraphTest.java | 24 +++++++ .../commitgraph/ChangedPathFilter.java | 39 +++++++++++ .../storage/commitgraph/CommitGraph.java | 14 ++++ .../commitgraph/CommitGraphBuilder.java | 24 ++++++- .../commitgraph/CommitGraphLoader.java | 8 +++ .../storage/commitgraph/CommitGraphV1.java | 11 ++- .../GraphChangedPathFilterData.java | 68 +++++++++++++++++++ 7 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphTest.java index 97976564d8..180585d5e7 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphTest.java @@ -10,9 +10,12 @@ package org.eclipse.jgit.internal.storage.commitgraph; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -196,6 +199,27 @@ public class CommitGraphTest extends RepositoryTestCase { assertEquals(getGenerationNumber(c8), 5); } + @Test + public void testGraphComputeChangedPaths() throws Exception { + RevCommit a = tr.commit(tr.tree(tr.file("d/f", tr.blob("a")))); + RevCommit b = tr.commit(tr.tree(tr.file("d/f", tr.blob("a"))), a); + RevCommit c = tr.commit(tr.tree(tr.file("d/f", tr.blob("b"))), b); + + writeAndReadCommitGraph(Collections.singleton(c)); + ChangedPathFilter acpf = commitGraph + .getChangedPathFilter(commitGraph.findGraphPosition(a)); + assertTrue(acpf.maybeContains("d".getBytes(UTF_8))); + assertTrue(acpf.maybeContains("d/f".getBytes(UTF_8))); + ChangedPathFilter bcpf = commitGraph + .getChangedPathFilter(commitGraph.findGraphPosition(b)); + assertFalse(bcpf.maybeContains("d".getBytes(UTF_8))); + assertFalse(bcpf.maybeContains("d/f".getBytes(UTF_8))); + ChangedPathFilter ccpf = commitGraph + .getChangedPathFilter(commitGraph.findGraphPosition(c)); + assertTrue(ccpf.maybeContains("d".getBytes(UTF_8))); + assertTrue(ccpf.maybeContains("d/f".getBytes(UTF_8))); + } + void writeAndReadCommitGraph(Set wants) throws Exception { NullProgressMonitor m = NullProgressMonitor.INSTANCE; try (RevWalk walk = new RevWalk(db)) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java index e9b8971ee3..53d921e8f8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java @@ -100,6 +100,23 @@ public class ChangedPathFilter { return new ChangedPathFilter(bloom, 0, bloom.length); } + /** + * Returns a filter read from a file. + * + * @param data + * data (read from a commit graph file) + * @param offset + * offset into data + * @param length + * length of data + * + * @return the corresponding filter + */ + public static ChangedPathFilter fromFile(byte[] data, int offset, + int length) { + return new ChangedPathFilter(data, offset, length); + } + private static void add(byte[] changedPathFilterData, byte[] path, int offset, int length) { @@ -112,6 +129,28 @@ public class ChangedPathFilter { } } + /** + * Checks if this changed path filter could contain path. + * + * @param path + * path to check existence of + * @return true if the filter could contain path, false if the filter + * definitely does not contain path + */ + public boolean maybeContains(byte[] path) { + int hash0 = MurmurHash3.hash32x86(path, 0, path.length, SEED1); + int hash1 = MurmurHash3.hash32x86(path, 0, path.length, SEED2); + int bloomFilterBits = length * 8; + for (int i = 0; i < PATH_HASH_COUNT; i++) { + int pos = Integer.remainderUnsigned(hash0 + i * hash1, + bloomFilterBits); + if ((data[offset + (pos / 8)] & (byte) (1 << (pos % 8))) == 0) { + return false; + } + } + return true; + } + /** * Writes this filter to the given stream. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java index 7fb5956f97..d1178c2850 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java @@ -47,6 +47,11 @@ public interface CommitGraph { return null; } + @Override + public ChangedPathFilter getChangedPathFilter(int graphPos) { + return null; + } + @Override public long getCommitCnt() { return 0; @@ -92,6 +97,15 @@ public interface CommitGraph { */ ObjectId getObjectId(int graphPos); + /** + * Get the changed path filter of the object at the commit-graph position. + * + * @param graphPos + * the position in the commit-graph of the object. + * @return the bloom filter or null if it's not found. + */ + ChangedPathFilter getChangedPathFilter(int graphPos); + /** * Obtain the total number of commits described by this commit-graph. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphBuilder.java index a6af3bc592..8f02770745 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphBuilder.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphBuilder.java @@ -10,6 +10,8 @@ package org.eclipse.jgit.internal.storage.commitgraph; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT; @@ -35,6 +37,10 @@ class CommitGraphBuilder { private byte[] extraList; + private byte[] bloomFilterIndex; + + private byte[] bloomFilterData; + /** @return A builder of {@link CommitGraph}. */ static CommitGraphBuilder builder() { return new CommitGraphBuilder(OBJECT_ID_LENGTH); @@ -72,6 +78,20 @@ class CommitGraphBuilder { return this; } + CommitGraphBuilder addBloomFilterIndex(byte[] buffer) + throws CommitGraphFormatException { + assertChunkNotSeenYet(bloomFilterIndex, CHUNK_ID_BLOOM_FILTER_INDEX); + bloomFilterIndex = buffer; + return this; + } + + CommitGraphBuilder addBloomFilterData(byte[] buffer) + throws CommitGraphFormatException { + assertChunkNotSeenYet(bloomFilterData, CHUNK_ID_BLOOM_FILTER_DATA); + bloomFilterData = buffer; + return this; + } + CommitGraph build() throws CommitGraphFormatException { assertChunkNotNull(oidFanout, CHUNK_ID_OID_FANOUT); assertChunkNotNull(oidLookup, CHUNK_ID_OID_LOOKUP); @@ -81,7 +101,9 @@ class CommitGraphBuilder { oidLookup); GraphCommitData commitDataChunk = new GraphCommitData(hashLength, commitData, extraList); - return new CommitGraphV1(index, commitDataChunk); + GraphChangedPathFilterData cpfData = new GraphChangedPathFilterData( + bloomFilterIndex, bloomFilterData); + return new CommitGraphV1(index, commitDataChunk, cpfData); } private void assertChunkNotNull(Object object, int chunkId) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphLoader.java index 571f5f4ebe..d6310e0a85 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphLoader.java @@ -10,6 +10,8 @@ package org.eclipse.jgit.internal.storage.commitgraph; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA; +import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT; @@ -164,6 +166,12 @@ public class CommitGraphLoader { case CHUNK_ID_EXTRA_EDGE_LIST: builder.addExtraList(buffer); break; + case CHUNK_ID_BLOOM_FILTER_INDEX: + builder.addBloomFilterIndex(buffer); + break; + case CHUNK_ID_BLOOM_FILTER_DATA: + builder.addBloomFilterData(buffer); + break; default: LOG.warn(MessageFormat.format( JGitText.get().commitGraphChunkUnknown, diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphV1.java index d520139bce..b0a9c83848 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphV1.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphV1.java @@ -24,9 +24,13 @@ class CommitGraphV1 implements CommitGraph { private final GraphCommitData commitData; - CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData) { + private final GraphChangedPathFilterData cpfData; + + CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData, + GraphChangedPathFilterData cpfData) { this.idx = index; this.commitData = commitData; + this.cpfData = cpfData; } @Override @@ -47,6 +51,11 @@ class CommitGraphV1 implements CommitGraph { return idx.getObjectId(graphPos); } + @Override + public ChangedPathFilter getChangedPathFilter(int graphPos) { + return cpfData.getChangedPathFilter(graphPos); + } + @Override public long getCommitCnt() { return idx.getCommitCnt(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java new file mode 100644 index 0000000000..738a42a01d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2023, Google LLC. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.internal.storage.commitgraph; + +import org.eclipse.jgit.util.NB; + +/** + * Represents the BIDX and BDAT data found in a commit graph file. + */ +class GraphChangedPathFilterData { + + private static final int BIDX_BYTES_PER_ENTRY = 4; + + private static final int BDAT_HEADER_BYTES = 12; + + private final byte[] bloomFilterIndex; + + private final byte[] bloomFilterData; + + /** + * Initialize the GraphChangedPathFilterData. + * + * @param bloomFilterIndex + * content of BIDX chunk, if it exists + * @param bloomFilterData + * content of BDAT chunk, if it exists + */ + GraphChangedPathFilterData(byte[] bloomFilterIndex, + byte[] bloomFilterData) { + + if ((bloomFilterIndex == null) != (bloomFilterData == null)) { + bloomFilterIndex = null; + bloomFilterData = null; + } + if (bloomFilterData != null + && (NB.decodeUInt32(bloomFilterData, + 4) != ChangedPathFilter.PATH_HASH_COUNT + || NB.decodeUInt32(bloomFilterData, + 8) != ChangedPathFilter.BITS_PER_ENTRY)) { + bloomFilterIndex = null; + bloomFilterData = null; + } + + this.bloomFilterIndex = bloomFilterIndex; + this.bloomFilterData = bloomFilterData; + } + + ChangedPathFilter getChangedPathFilter(int graphPos) { + if (bloomFilterIndex == null) { + return null; + } + int priorCumul = graphPos == 0 ? 0 + : NB.decodeInt32(bloomFilterIndex, + graphPos * BIDX_BYTES_PER_ENTRY - BIDX_BYTES_PER_ENTRY); + int cumul = NB.decodeInt32(bloomFilterIndex, graphPos * BIDX_BYTES_PER_ENTRY); + return ChangedPathFilter.fromFile(bloomFilterData, + priorCumul + BDAT_HEADER_BYTES, + cumul - priorCumul); + } +} -- 2.39.5