]> source.dussan.org Git - jgit.git/commitdiff
CommitGraphLoader: read changed-path filters 52/201852/13
authorJonathan Tan <jonathantanmy@google.com>
Mon, 24 Apr 2023 19:55:30 +0000 (12:55 -0700)
committerJonathan Tan <jonathantanmy@google.com>
Tue, 18 Jul 2023 21:21:48 +0000 (14:21 -0700)
As described in the parent commit, add support for reading the BIDX and
BDAT chunks of the commit graph file, as described in man gitformat-
commit-graph(5).

This work is based on earlier work by Kyle Zhao
(I160f6b022afaa842c331fb9a086974e49dced7b2).

Change-Id: I82e02e6a3a3b758e6bf9d7bbd2198f0ffe3a331b
Signed-off-by: kylezhao <kylezhao@tencent.com>
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphTest.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/ChangedPathFilter.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraph.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphBuilder.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphLoader.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/CommitGraphV1.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java [new file with mode: 0644]

index 97976564d87ee66bcc4d09fa9c77015e1e29e35e..180585d5e71aec59eb37e254de203c79eb1b6c1d 100644 (file)
 
 package org.eclipse.jgit.internal.storage.commitgraph;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.eclipse.jgit.lib.Constants.COMMIT_GENERATION_UNKNOWN;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -196,6 +199,27 @@ public class CommitGraphTest extends RepositoryTestCase {
                assertEquals(getGenerationNumber(c8), 5);
        }
 
+       @Test
+       public void testGraphComputeChangedPaths() throws Exception {
+               RevCommit a = tr.commit(tr.tree(tr.file("d/f", tr.blob("a"))));
+               RevCommit b = tr.commit(tr.tree(tr.file("d/f", tr.blob("a"))), a);
+               RevCommit c = tr.commit(tr.tree(tr.file("d/f", tr.blob("b"))), b);
+
+               writeAndReadCommitGraph(Collections.singleton(c));
+               ChangedPathFilter acpf = commitGraph
+                               .getChangedPathFilter(commitGraph.findGraphPosition(a));
+               assertTrue(acpf.maybeContains("d".getBytes(UTF_8)));
+               assertTrue(acpf.maybeContains("d/f".getBytes(UTF_8)));
+               ChangedPathFilter bcpf = commitGraph
+                               .getChangedPathFilter(commitGraph.findGraphPosition(b));
+               assertFalse(bcpf.maybeContains("d".getBytes(UTF_8)));
+               assertFalse(bcpf.maybeContains("d/f".getBytes(UTF_8)));
+               ChangedPathFilter ccpf = commitGraph
+                               .getChangedPathFilter(commitGraph.findGraphPosition(c));
+               assertTrue(ccpf.maybeContains("d".getBytes(UTF_8)));
+               assertTrue(ccpf.maybeContains("d/f".getBytes(UTF_8)));
+       }
+
        void writeAndReadCommitGraph(Set<ObjectId> wants) throws Exception {
                NullProgressMonitor m = NullProgressMonitor.INSTANCE;
                try (RevWalk walk = new RevWalk(db)) {
index e9b8971ee3ccba2cd985148daf0ec33969e9ed1b..53d921e8f841a20c633af2f14e7aff9c2370538b 100644 (file)
@@ -100,6 +100,23 @@ public class ChangedPathFilter {
                return new ChangedPathFilter(bloom, 0, bloom.length);
        }
 
+       /**
+        * Returns a filter read from a file.
+        *
+        * @param data
+        *            data (read from a commit graph file)
+        * @param offset
+        *            offset into data
+        * @param length
+        *            length of data
+        *
+        * @return the corresponding filter
+        */
+       public static ChangedPathFilter fromFile(byte[] data, int offset,
+                       int length) {
+               return new ChangedPathFilter(data, offset, length);
+       }
+
        private static void add(byte[] changedPathFilterData, byte[] path,
                        int offset, int length) {
 
@@ -112,6 +129,28 @@ public class ChangedPathFilter {
                }
        }
 
+       /**
+        * Checks if this changed path filter could contain path.
+        *
+        * @param path
+        *            path to check existence of
+        * @return true if the filter could contain path, false if the filter
+        *         definitely does not contain path
+        */
+       public boolean maybeContains(byte[] path) {
+               int hash0 = MurmurHash3.hash32x86(path, 0, path.length, SEED1);
+               int hash1 = MurmurHash3.hash32x86(path, 0, path.length, SEED2);
+               int bloomFilterBits = length * 8;
+               for (int i = 0; i < PATH_HASH_COUNT; i++) {
+                       int pos = Integer.remainderUnsigned(hash0 + i * hash1,
+                                       bloomFilterBits);
+                       if ((data[offset + (pos / 8)] & (byte) (1 << (pos % 8))) == 0) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+
        /**
         * Writes this filter to the given stream.
         *
index 7fb5956f97fca0f0e821a25de6424396e2b7fa3c..d1178c28502e24881ac8d676e10e1de7dd4d3c06 100644 (file)
@@ -47,6 +47,11 @@ public interface CommitGraph {
                        return null;
                }
 
+               @Override
+               public ChangedPathFilter getChangedPathFilter(int graphPos) {
+                       return null;
+               }
+
                @Override
                public long getCommitCnt() {
                        return 0;
@@ -92,6 +97,15 @@ public interface CommitGraph {
         */
        ObjectId getObjectId(int graphPos);
 
+       /**
+        * Get the changed path filter of the object at the commit-graph position.
+        *
+        * @param graphPos
+        *            the position in the commit-graph of the object.
+        * @return the bloom filter or null if it's not found.
+        */
+       ChangedPathFilter getChangedPathFilter(int graphPos);
+
        /**
         * Obtain the total number of commits described by this commit-graph.
         *
index a6af3bc592ce586ced43016f9ea2e0b021b8e4a4..8f02770745d08bb5011003e74fb2c9dd6df3d2da 100644 (file)
@@ -10,6 +10,8 @@
 
 package org.eclipse.jgit.internal.storage.commitgraph;
 
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@@ -35,6 +37,10 @@ class CommitGraphBuilder {
 
        private byte[] extraList;
 
+       private byte[] bloomFilterIndex;
+
+       private byte[] bloomFilterData;
+
        /** @return A builder of {@link CommitGraph}. */
        static CommitGraphBuilder builder() {
                return new CommitGraphBuilder(OBJECT_ID_LENGTH);
@@ -72,6 +78,20 @@ class CommitGraphBuilder {
                return this;
        }
 
+       CommitGraphBuilder addBloomFilterIndex(byte[] buffer)
+                       throws CommitGraphFormatException {
+               assertChunkNotSeenYet(bloomFilterIndex, CHUNK_ID_BLOOM_FILTER_INDEX);
+               bloomFilterIndex = buffer;
+               return this;
+       }
+
+       CommitGraphBuilder addBloomFilterData(byte[] buffer)
+                       throws CommitGraphFormatException {
+               assertChunkNotSeenYet(bloomFilterData, CHUNK_ID_BLOOM_FILTER_DATA);
+               bloomFilterData = buffer;
+               return this;
+       }
+
        CommitGraph build() throws CommitGraphFormatException {
                assertChunkNotNull(oidFanout, CHUNK_ID_OID_FANOUT);
                assertChunkNotNull(oidLookup, CHUNK_ID_OID_LOOKUP);
@@ -81,7 +101,9 @@ class CommitGraphBuilder {
                                oidLookup);
                GraphCommitData commitDataChunk = new GraphCommitData(hashLength,
                                commitData, extraList);
-               return new CommitGraphV1(index, commitDataChunk);
+               GraphChangedPathFilterData cpfData = new GraphChangedPathFilterData(
+                               bloomFilterIndex, bloomFilterData);
+               return new CommitGraphV1(index, commitDataChunk, cpfData);
        }
 
        private void assertChunkNotNull(Object object, int chunkId)
index 571f5f4ebeb05839589a576538f461dded0c872e..d6310e0a859d5943016678f25552f1c67d37a915 100644 (file)
@@ -10,6 +10,8 @@
 
 package org.eclipse.jgit.internal.storage.commitgraph;
 
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_DATA;
+import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_BLOOM_FILTER_INDEX;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_COMMIT_DATA;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_EXTRA_EDGE_LIST;
 import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraphConstants.CHUNK_ID_OID_FANOUT;
@@ -164,6 +166,12 @@ public class CommitGraphLoader {
                        case CHUNK_ID_EXTRA_EDGE_LIST:
                                builder.addExtraList(buffer);
                                break;
+                       case CHUNK_ID_BLOOM_FILTER_INDEX:
+                               builder.addBloomFilterIndex(buffer);
+                               break;
+                       case CHUNK_ID_BLOOM_FILTER_DATA:
+                               builder.addBloomFilterData(buffer);
+                               break;
                        default:
                                LOG.warn(MessageFormat.format(
                                                JGitText.get().commitGraphChunkUnknown,
index d520139bce02c3988aad95f840703678fd02c05c..b0a9c83848cb9e749a4f15cbc504c1eef305c59a 100644 (file)
@@ -24,9 +24,13 @@ class CommitGraphV1 implements CommitGraph {
 
        private final GraphCommitData commitData;
 
-       CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData) {
+       private final GraphChangedPathFilterData cpfData;
+
+       CommitGraphV1(GraphObjectIndex index, GraphCommitData commitData,
+                       GraphChangedPathFilterData cpfData) {
                this.idx = index;
                this.commitData = commitData;
+               this.cpfData = cpfData;
        }
 
        @Override
@@ -47,6 +51,11 @@ class CommitGraphV1 implements CommitGraph {
                return idx.getObjectId(graphPos);
        }
 
+       @Override
+       public ChangedPathFilter getChangedPathFilter(int graphPos) {
+               return cpfData.getChangedPathFilter(graphPos);
+       }
+
        @Override
        public long getCommitCnt() {
                return idx.getCommitCnt();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/commitgraph/GraphChangedPathFilterData.java
new file mode 100644 (file)
index 0000000..738a42a
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2023, Google LLC.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+package org.eclipse.jgit.internal.storage.commitgraph;
+
+import org.eclipse.jgit.util.NB;
+
+/**
+ * Represents the BIDX and BDAT data found in a commit graph file.
+ */
+class GraphChangedPathFilterData {
+
+       private static final int BIDX_BYTES_PER_ENTRY = 4;
+
+       private static final int BDAT_HEADER_BYTES = 12;
+
+       private final byte[] bloomFilterIndex;
+
+       private final byte[] bloomFilterData;
+
+       /**
+        * Initialize the GraphChangedPathFilterData.
+        *
+        * @param bloomFilterIndex
+        *            content of BIDX chunk, if it exists
+        * @param bloomFilterData
+        *            content of BDAT chunk, if it exists
+        */
+       GraphChangedPathFilterData(byte[] bloomFilterIndex,
+                       byte[] bloomFilterData) {
+
+               if ((bloomFilterIndex == null) != (bloomFilterData == null)) {
+                       bloomFilterIndex = null;
+                       bloomFilterData = null;
+               }
+               if (bloomFilterData != null
+                               && (NB.decodeUInt32(bloomFilterData,
+                                               4) != ChangedPathFilter.PATH_HASH_COUNT
+                                               || NB.decodeUInt32(bloomFilterData,
+                                                               8) != ChangedPathFilter.BITS_PER_ENTRY)) {
+                       bloomFilterIndex = null;
+                       bloomFilterData = null;
+               }
+
+               this.bloomFilterIndex = bloomFilterIndex;
+               this.bloomFilterData = bloomFilterData;
+       }
+
+       ChangedPathFilter getChangedPathFilter(int graphPos) {
+               if (bloomFilterIndex == null) {
+                       return null;
+               }
+               int priorCumul = graphPos == 0 ? 0
+                               : NB.decodeInt32(bloomFilterIndex,
+                                               graphPos * BIDX_BYTES_PER_ENTRY - BIDX_BYTES_PER_ENTRY);
+               int cumul = NB.decodeInt32(bloomFilterIndex, graphPos * BIDX_BYTES_PER_ENTRY);
+               return ChangedPathFilter.fromFile(bloomFilterData,
+                               priorCumul + BDAT_HEADER_BYTES,
+                               cumul - priorCumul);
+       }
+}