diff options
author | Ivan Frade <ifrade@google.com> | 2025-04-15 13:22:06 -0700 |
---|---|---|
committer | Ivan Frade <ifrade@google.com> | 2025-04-15 16:16:58 -0700 |
commit | 9b51ec8f2ff1a118844227c1ea41f0f0fe05e2ef (patch) | |
tree | 39bd25bc40d912e003df01bd702ad32ef563f269 | |
parent | f627e89147c437de9d2f527b76bb599323919e73 (diff) | |
download | jgit-9b51ec8f2ff1a118844227c1ea41f0f0fe05e2ef.tar.gz jgit-9b51ec8f2ff1a118844227c1ea41f0f0fe05e2ef.zip |
MultiPackIndex: Add and implement #resolve() method
The reader offers the resolve functionality to find object ids with
certain prefix. The basic implementation iterates through packs and
calls resolve in their indexes. With multipack index, we can answer
the resolve directly for all the packs included.
Offer a resolve() method and implement it in the multipack index.
Change-Id: If5679652f149a41afe568c719ba40b291ae1b917
3 files changed, 241 insertions, 8 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java index da015bf930..ab452854b2 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java @@ -13,16 +13,21 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.junit.FakeIndexFactory; import org.eclipse.jgit.junit.JGitTestUtil; +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.junit.Test; @@ -67,8 +72,7 @@ public class MultiPackIndexTest { assertNotNull(midx); assertArrayEquals(packNames, midx.getPackNames()); - MultiPackIndex.PackOffset oo = midx - .find(ObjectId.fromString(knownOid)); + MultiPackIndex.PackOffset oo = midx.find(ObjectId.fromString(knownOid)); assertEquals(knowOffset, oo.getOffset()); assertEquals(knownPackId, oo.getPackId()); @@ -133,7 +137,7 @@ public class MultiPackIndexTest { "0000000000000000000000000000000000000005", 12))); PackIndex idxTwo = FakeIndexFactory.indexOf(List.of( new FakeIndexFactory.IndexObject( - "0000000000000000000000000000000000000002", (1L<< 35)), + "0000000000000000000000000000000000000002", (1L << 35)), new FakeIndexFactory.IndexObject( "0000000000000000000000000000000000000003", 13))); Map<String, PackIndex> packs = Map.of("p1", idxOne, "p2", idxTwo); @@ -144,9 +148,11 @@ public class MultiPackIndexTest { MultiPackIndex midx = MultiPackIndexLoader .read(new ByteArrayInputStream(out.toByteArray())); assertEquals(2, midx.getPackNames().length); - assertInIndex(midx, 0, "0000000000000000000000000000000000000001", (1L << 34)); + assertInIndex(midx, 0, "0000000000000000000000000000000000000001", + (1L << 34)); assertInIndex(midx, 0, "0000000000000000000000000000000000000005", 12); - assertInIndex(midx, 1, "0000000000000000000000000000000000000002", (1L << 35)); + assertInIndex(midx, 1, "0000000000000000000000000000000000000002", + (1L << 35)); } @Test @@ -155,7 +161,8 @@ public class MultiPackIndexTest { // Most significant bit to 1 is still valid offset PackIndex idxOne = FakeIndexFactory.indexOf(List.of( new FakeIndexFactory.IndexObject( - "0000000000000000000000000000000000000001", 0xff00_0000), + "0000000000000000000000000000000000000001", + 0xff00_0000), new FakeIndexFactory.IndexObject( "0000000000000000000000000000000000000005", 12))); PackIndex idxTwo = FakeIndexFactory.indexOf(List.of( @@ -171,9 +178,161 @@ public class MultiPackIndexTest { MultiPackIndex midx = MultiPackIndexLoader .read(new ByteArrayInputStream(out.toByteArray())); assertEquals(2, midx.getPackNames().length); - assertInIndex(midx, 0, "0000000000000000000000000000000000000001", 0xff00_0000L); + assertInIndex(midx, 0, "0000000000000000000000000000000000000001", + 0xff00_0000L); assertInIndex(midx, 0, "0000000000000000000000000000000000000005", 12); } + + @Test + public void jgit_resolve() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("32fe829a1c"); + + PackIndex idxOne = indexWith( + // Noise + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + // One before abbrev + "32fe829a1b000000000000000000000000000001", + // matches + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + // One after abbrev + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + // Match + "32fe829a1c000000000000000000000000000010"); + + Map<String, PackIndex> packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set<ObjectId> results = new HashSet<>(); + midx.resolve(results, abbrev, 100); + + assertEquals(3, results.size()); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000001"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000010"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000100"))); + + } + + @Test + public void jgit_resolve_matchLimit() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("32fe829a1c"); + + PackIndex idxOne = indexWith( + // Noise + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + // One before abbrev + "32fe829a1b000000000000000000000000000001", + // matches + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + // One after abbrev + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + // Match + "32fe829a1c000000000000000000000000000010"); + + Map<String, PackIndex> packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set<ObjectId> results = new HashSet<>(); + midx.resolve(results, abbrev, 2); + + assertEquals(2, results.size()); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000001"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000010"))); + } + + @Test + public void jgit_resolve_noMatches() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("4400000000"); + + PackIndex idxOne = indexWith( + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + "32fe829a1b000000000000000000000000000001", + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + "32fe829a1c000000000000000000000000000010"); + + Map<String, PackIndex> packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set<ObjectId> results = new HashSet<>(); + midx.resolve(results, abbrev, 200); + + assertEquals(0, results.size()); + } + + @Test + public void jgit_resolve_empty() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("4400000000"); + + PackIndex idxOne = FakeIndexFactory.indexOf(List.of()); + PackIndex idxTwo = FakeIndexFactory.indexOf(List.of()); + + Map<String, PackIndex> packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set<ObjectId> results = new HashSet<>(); + midx.resolve(results, abbrev, 200); + + assertEquals(0, results.size()); + } + + private static PackIndex indexWith(String... oids) { + List<FakeIndexFactory.IndexObject> idxObjs = new ArrayList<>( + oids.length); + int offset = 12; + for (String oid : oids) { + idxObjs.add(new FakeIndexFactory.IndexObject(oid, offset)); + offset += 10; + } + return FakeIndexFactory.indexOf(idxObjs); + } + private static void assertInIndex(MultiPackIndex midx, int expectedPackId, String oid, long expectedOffset) { MultiPackIndex.PackOffset packOffset = midx diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java index 845576d8c2..15b52391b8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java @@ -10,7 +10,11 @@ package org.eclipse.jgit.internal.storage.midx; +import java.util.Set; + +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; /** * An index over multiple packs @@ -48,6 +52,20 @@ public interface MultiPackIndex { PackOffset find(AnyObjectId objectId); /** + * Find objects matching the prefix abbreviation. + * + * @param matches + * set to add any located ObjectIds to. This is an output + * parameter. + * @param id + * prefix to search for. + * @param matchLimit + * maximum number of results to return. At most this many + * ObjectIds should be added to matches before returning. + */ + void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, int matchLimit); + + /** * Memory size of this multipack index * * @return size of this multipack index in memory, in bytes diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java index bb122c7a01..be752cc4b5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java @@ -12,12 +12,15 @@ package org.eclipse.jgit.internal.storage.midx; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Set; import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.midx.MultiPackIndexLoader.MultiPackIndexFormatException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.util.NB; /** @@ -69,6 +72,12 @@ class MultiPackIndexV1 implements MultiPackIndex { } @Override + public void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, + int matchLimit) { + idx.resolve(matches, id, matchLimit); + } + + @Override public long getMemorySize() { int packNamesSize = Arrays.stream(packNames) .mapToInt(s -> s.getBytes(StandardCharsets.UTF_8).length).sum(); @@ -146,7 +155,8 @@ class MultiPackIndexV1 implements MultiPackIndex { } long getMemorySize() { - return (long)byteArrayLengh(offsets) + byteArrayLengh(largeOffsets); + return (long) byteArrayLengh(offsets) + + byteArrayLengh(largeOffsets); } } @@ -226,6 +236,52 @@ class MultiPackIndexV1 implements MultiPackIndex { return -1; } + void resolve(Set<ObjectId> matches, AbbreviatedObjectId id, + int matchLimit) { + if (matches.size() >= matchLimit) { + return; + } + + if (oidLookup.length == 0) { + return; + } + + int high = fanoutTable[id.getFirstByte()]; + int low = id.getFirstByte() == 0 ? 0 + : fanoutTable[id.getFirstByte() - 1]; + do { + int p = (low + high) >>> 1; + int cmp = id.prefixCompare(oidLookup, idOffset(p)); + if (cmp < 0) { + high = p; + continue; + } + + if (cmp > 0) { + low = p + 1; + continue; + } + + // Got a match. + // We may have landed in the middle of the matches. Move + // backwards to the start of matches, then walk forwards. + while (0 < p + && id.prefixCompare(oidLookup, idOffset(p - 1)) == 0) { + p--; + } + while (p < high && id.prefixCompare(oidLookup, idOffset(p)) == 0 + && matches.size() < matchLimit) { + matches.add(ObjectId.fromRaw(oidLookup, idOffset(p))); + p++; + } + return; + } while (low < high); + } + + private int idOffset(int position) { + return position * hashLength; + } + long getMemorySize() { return 4L + byteArrayLengh(oidLookup) + (FANOUT * 4); } |