From 9b51ec8f2ff1a118844227c1ea41f0f0fe05e2ef Mon Sep 17 00:00:00 2001 From: Ivan Frade Date: Tue, 15 Apr 2025 13:22:06 -0700 Subject: MultiPackIndex: Add and implement #resolve() method The reader offers the resolve functionality to find object ids with certain prefix. The basic implementation iterates through packs and calls resolve in their indexes. With multipack index, we can answer the resolve directly for all the packs included. Offer a resolve() method and implement it in the multipack index. Change-Id: If5679652f149a41afe568c719ba40b291ae1b917 --- .../internal/storage/midx/MultiPackIndexTest.java | 173 ++++++++++++++++++++- .../jgit/internal/storage/midx/MultiPackIndex.java | 18 +++ .../internal/storage/midx/MultiPackIndexV1.java | 58 ++++++- 3 files changed, 241 insertions(+), 8 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java index da015bf930..ab452854b2 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexTest.java @@ -13,16 +13,21 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.junit.FakeIndexFactory; import org.eclipse.jgit.junit.JGitTestUtil; +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.junit.Test; @@ -67,8 +72,7 @@ public class MultiPackIndexTest { assertNotNull(midx); assertArrayEquals(packNames, midx.getPackNames()); - MultiPackIndex.PackOffset oo = midx - .find(ObjectId.fromString(knownOid)); + MultiPackIndex.PackOffset oo = midx.find(ObjectId.fromString(knownOid)); assertEquals(knowOffset, oo.getOffset()); assertEquals(knownPackId, oo.getPackId()); @@ -133,7 +137,7 @@ public class MultiPackIndexTest { "0000000000000000000000000000000000000005", 12))); PackIndex idxTwo = FakeIndexFactory.indexOf(List.of( new FakeIndexFactory.IndexObject( - "0000000000000000000000000000000000000002", (1L<< 35)), + "0000000000000000000000000000000000000002", (1L << 35)), new FakeIndexFactory.IndexObject( "0000000000000000000000000000000000000003", 13))); Map packs = Map.of("p1", idxOne, "p2", idxTwo); @@ -144,9 +148,11 @@ public class MultiPackIndexTest { MultiPackIndex midx = MultiPackIndexLoader .read(new ByteArrayInputStream(out.toByteArray())); assertEquals(2, midx.getPackNames().length); - assertInIndex(midx, 0, "0000000000000000000000000000000000000001", (1L << 34)); + assertInIndex(midx, 0, "0000000000000000000000000000000000000001", + (1L << 34)); assertInIndex(midx, 0, "0000000000000000000000000000000000000005", 12); - assertInIndex(midx, 1, "0000000000000000000000000000000000000002", (1L << 35)); + assertInIndex(midx, 1, "0000000000000000000000000000000000000002", + (1L << 35)); } @Test @@ -155,7 +161,8 @@ public class MultiPackIndexTest { // Most significant bit to 1 is still valid offset PackIndex idxOne = FakeIndexFactory.indexOf(List.of( new FakeIndexFactory.IndexObject( - "0000000000000000000000000000000000000001", 0xff00_0000), + "0000000000000000000000000000000000000001", + 0xff00_0000), new FakeIndexFactory.IndexObject( "0000000000000000000000000000000000000005", 12))); PackIndex idxTwo = FakeIndexFactory.indexOf(List.of( @@ -171,9 +178,161 @@ public class MultiPackIndexTest { MultiPackIndex midx = MultiPackIndexLoader .read(new ByteArrayInputStream(out.toByteArray())); assertEquals(2, midx.getPackNames().length); - assertInIndex(midx, 0, "0000000000000000000000000000000000000001", 0xff00_0000L); + assertInIndex(midx, 0, "0000000000000000000000000000000000000001", + 0xff00_0000L); assertInIndex(midx, 0, "0000000000000000000000000000000000000005", 12); } + + @Test + public void jgit_resolve() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("32fe829a1c"); + + PackIndex idxOne = indexWith( + // Noise + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + // One before abbrev + "32fe829a1b000000000000000000000000000001", + // matches + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + // One after abbrev + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + // Match + "32fe829a1c000000000000000000000000000010"); + + Map packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set results = new HashSet<>(); + midx.resolve(results, abbrev, 100); + + assertEquals(3, results.size()); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000001"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000010"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000100"))); + + } + + @Test + public void jgit_resolve_matchLimit() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("32fe829a1c"); + + PackIndex idxOne = indexWith( + // Noise + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + // One before abbrev + "32fe829a1b000000000000000000000000000001", + // matches + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + // One after abbrev + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + // Match + "32fe829a1c000000000000000000000000000010"); + + Map packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set results = new HashSet<>(); + midx.resolve(results, abbrev, 2); + + assertEquals(2, results.size()); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000001"))); + assertTrue(results.contains(ObjectId + .fromString("32fe829a1c000000000000000000000000000010"))); + } + + @Test + public void jgit_resolve_noMatches() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("4400000000"); + + PackIndex idxOne = indexWith( + "0000000000000000000000000000000000000001", + "3000000000000000000000000000000000000005", + "32fe829a1b000000000000000000000000000001", + "32fe829a1c000000000000000000000000000001", + "32fe829a1c000000000000000000000000000100", + "32fe829a1d000000000000000000000000000000"); + PackIndex idxTwo = indexWith( + // Noise + "8888880000000000000000000000000000000002", + "bbbbbb0000000000000000000000000000000003", + "32fe829a1c000000000000000000000000000010"); + + Map packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set results = new HashSet<>(); + midx.resolve(results, abbrev, 200); + + assertEquals(0, results.size()); + } + + @Test + public void jgit_resolve_empty() throws IOException { + AbbreviatedObjectId abbrev = AbbreviatedObjectId + .fromString("4400000000"); + + PackIndex idxOne = FakeIndexFactory.indexOf(List.of()); + PackIndex idxTwo = FakeIndexFactory.indexOf(List.of()); + + Map packs = Map.of("p1", idxOne, "p2", idxTwo); + MultiPackIndexWriter writer = new MultiPackIndexWriter(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writer.write(NullProgressMonitor.INSTANCE, out, packs); + MultiPackIndex midx = MultiPackIndexLoader + .read(new ByteArrayInputStream(out.toByteArray())); + + + Set results = new HashSet<>(); + midx.resolve(results, abbrev, 200); + + assertEquals(0, results.size()); + } + + private static PackIndex indexWith(String... oids) { + List idxObjs = new ArrayList<>( + oids.length); + int offset = 12; + for (String oid : oids) { + idxObjs.add(new FakeIndexFactory.IndexObject(oid, offset)); + offset += 10; + } + return FakeIndexFactory.indexOf(idxObjs); + } + private static void assertInIndex(MultiPackIndex midx, int expectedPackId, String oid, long expectedOffset) { MultiPackIndex.PackOffset packOffset = midx diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java index 845576d8c2..15b52391b8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndex.java @@ -10,7 +10,11 @@ package org.eclipse.jgit.internal.storage.midx; +import java.util.Set; + +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; /** * An index over multiple packs @@ -47,6 +51,20 @@ public interface MultiPackIndex { */ PackOffset find(AnyObjectId objectId); + /** + * Find objects matching the prefix abbreviation. + * + * @param matches + * set to add any located ObjectIds to. This is an output + * parameter. + * @param id + * prefix to search for. + * @param matchLimit + * maximum number of results to return. At most this many + * ObjectIds should be added to matches before returning. + */ + void resolve(Set matches, AbbreviatedObjectId id, int matchLimit); + /** * Memory size of this multipack index * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java index bb122c7a01..be752cc4b5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexV1.java @@ -12,12 +12,15 @@ package org.eclipse.jgit.internal.storage.midx; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Set; import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.midx.MultiPackIndexLoader.MultiPackIndexFormatException; +import org.eclipse.jgit.lib.AbbreviatedObjectId; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.util.NB; /** @@ -68,6 +71,12 @@ class MultiPackIndexV1 implements MultiPackIndex { return result; } + @Override + public void resolve(Set matches, AbbreviatedObjectId id, + int matchLimit) { + idx.resolve(matches, id, matchLimit); + } + @Override public long getMemorySize() { int packNamesSize = Arrays.stream(packNames) @@ -146,7 +155,8 @@ class MultiPackIndexV1 implements MultiPackIndex { } long getMemorySize() { - return (long)byteArrayLengh(offsets) + byteArrayLengh(largeOffsets); + return (long) byteArrayLengh(offsets) + + byteArrayLengh(largeOffsets); } } @@ -226,6 +236,52 @@ class MultiPackIndexV1 implements MultiPackIndex { return -1; } + void resolve(Set matches, AbbreviatedObjectId id, + int matchLimit) { + if (matches.size() >= matchLimit) { + return; + } + + if (oidLookup.length == 0) { + return; + } + + int high = fanoutTable[id.getFirstByte()]; + int low = id.getFirstByte() == 0 ? 0 + : fanoutTable[id.getFirstByte() - 1]; + do { + int p = (low + high) >>> 1; + int cmp = id.prefixCompare(oidLookup, idOffset(p)); + if (cmp < 0) { + high = p; + continue; + } + + if (cmp > 0) { + low = p + 1; + continue; + } + + // Got a match. + // We may have landed in the middle of the matches. Move + // backwards to the start of matches, then walk forwards. + while (0 < p + && id.prefixCompare(oidLookup, idOffset(p - 1)) == 0) { + p--; + } + while (p < high && id.prefixCompare(oidLookup, idOffset(p)) == 0 + && matches.size() < matchLimit) { + matches.add(ObjectId.fromRaw(oidLookup, idOffset(p))); + p++; + } + return; + } while (low < high); + } + + private int idOffset(int position) { + return position * hashLength; + } + long getMemorySize() { return 4L + byteArrayLengh(oidLookup) + (FANOUT * 4); } -- cgit v1.2.3