aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIvan Frade <ifrade@google.com>2025-01-23 15:49:30 -0800
committerIvan Frade <ifrade@google.com>2025-02-12 12:04:44 -0800
commit072e93fded7229f00002232b33ed91a0ef614ddb (patch)
treebe09481a186af4e3a55025f221a8306dae3c74d4
parentb3230f25e57858958f4fc1d01070dfc0bf08370e (diff)
downloadjgit-072e93fded7229f00002232b33ed91a0ef614ddb.tar.gz
jgit-072e93fded7229f00002232b33ed91a0ef614ddb.zip
midx.MultiPackIndexWriter: a writer for the multipack index format
Fromg git documentation[1]: While the pack-indexes provide fast lookup per packfile, this performance degrades as the number of packfiles increases, because abbreviations need to inspect every packfile and we are more likely to have a miss on our most-recently-used packfile. For some large repositories, repacking into a single packfile is not feasible due to storage space or excessive repack times. (...) The multi-pack-index (MIDX for short) stores a list of objects and their offsets into multiple packfiles. (...) Thus, we can provide O(log N) lookup time for any number of packfiles. This is a writer of the multipack index format. The test only verifies the "shape" of the file, when we get a parser we can check also the values (specially the large offset handling). On the JGit repository, the multipack index generated by this writer passes the validation of `git multi-pack-index verify`. [1] https://git-scm.com/docs/pack-format#_multi_pack_index_midx_files_have_the_following_format Change-Id: I1fca599c4ebf28154f28f039c2c4cfe75b2dc79d
-rw-r--r--org.eclipse.jgit.test/exttst/org/eclipse/jgit/internal/storage/midx/CgitMidxCompatibilityTest.java210
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java161
-rw-r--r--org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties2
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java2
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexConstants.java55
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java422
6 files changed, 852 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/exttst/org/eclipse/jgit/internal/storage/midx/CgitMidxCompatibilityTest.java b/org.eclipse.jgit.test/exttst/org/eclipse/jgit/internal/storage/midx/CgitMidxCompatibilityTest.java
new file mode 100644
index 0000000000..bbff7d433f
--- /dev/null
+++ b/org.eclipse.jgit.test/exttst/org/eclipse/jgit/internal/storage/midx/CgitMidxCompatibilityTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2025, Google Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.midx;
+
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDLOOKUP;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_PACKNAMES;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.eclipse.jgit.internal.storage.file.ObjectDirectory;
+import org.eclipse.jgit.internal.storage.file.Pack;
+import org.eclipse.jgit.internal.storage.file.PackFile;
+import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.pack.PackExt;
+import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.test.resources.SampleDataRepositoryTestCase;
+import org.eclipse.jgit.util.NB;
+import org.junit.Test;
+
+public class CgitMidxCompatibilityTest extends SampleDataRepositoryTestCase {
+
+ @Test
+ public void jgitMidx_verifyByCgit()
+ throws IOException, InterruptedException {
+ byte[] jgitMidxBytes = generateJGitMidx();
+ writeMidx(jgitMidxBytes);
+ assertEquals("cgit exit code", 0, run_cgit_multipackindex_verify());
+ }
+
+ @Test
+ public void compareBasicChunkSizes()
+ throws IOException, InterruptedException {
+ // We cannot compare byte-by-byte because there are optional chunks and
+ // it is not guaranteed what cgit and jgit will generate
+ byte[] jgitMidxBytes = generateJGitMidx();
+ assertEquals("cgit exit code", 0, run_cgit_multipackindex_write());
+ byte[] cgitMidxBytes = readCgitMidx();
+
+ RawMultiPackIndex jgitMidx = new RawMultiPackIndex(jgitMidxBytes);
+ RawMultiPackIndex cgitMidx = new RawMultiPackIndex(cgitMidxBytes);
+
+ // This is a fixed sized chunk
+ assertEquals(256 * 4, cgitMidx.getChunkSize(MIDX_CHUNKID_OIDFANOUT));
+ assertArrayEquals(cgitMidx.getRawChunk(MIDX_CHUNKID_OIDFANOUT),
+ jgitMidx.getRawChunk(MIDX_CHUNKID_OIDFANOUT));
+
+ assertArrayEquals(cgitMidx.getRawChunk(MIDX_CHUNKID_OIDLOOKUP),
+ jgitMidx.getRawChunk(MIDX_CHUNKID_OIDLOOKUP));
+
+ // The spec has changed from padding packnames to a multile of four, to
+ // move the packname chunk to the end of the file.
+ // git 2.48 pads the packs names to a multiple of 4
+ // jgit puts the chunk at the end
+ byte[] cgitPacknames = trimPadding(
+ cgitMidx.getRawChunk(MIDX_CHUNKID_PACKNAMES));
+ assertArrayEquals(cgitPacknames,
+ jgitMidx.getRawChunk(MIDX_CHUNKID_PACKNAMES));
+
+ assertArrayEquals(cgitMidx.getRawChunk(MIDX_CHUNKID_OBJECTOFFSETS),
+ jgitMidx.getRawChunk(MIDX_CHUNKID_OBJECTOFFSETS));
+
+ }
+
+ private byte[] generateJGitMidx() throws IOException {
+ Map<String, PackIndex> indexes = new HashMap<>();
+ for (Pack pack : db.getObjectDatabase().getPacks()) {
+ PackFile packFile = pack.getPackFile().create(PackExt.INDEX);
+ indexes.put(packFile.getName(), pack.getIndex());
+ }
+
+ MultiPackIndexWriter writer = new MultiPackIndexWriter();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ writer.write(NullProgressMonitor.INSTANCE, out, indexes);
+ return out.toByteArray();
+ }
+
+ private int run_cgit_multipackindex_write()
+ throws IOException, InterruptedException {
+ String[] command = new String[] { "git", "multi-pack-index", "write" };
+ Process proc = Runtime.getRuntime().exec(command, new String[0],
+ db.getDirectory());
+ return proc.waitFor();
+ }
+
+ private int run_cgit_multipackindex_verify()
+ throws IOException, InterruptedException {
+ String[] command = new String[] { "git", "multi-pack-index", "verify" };
+ Process proc = Runtime.getRuntime().exec(command, new String[0],
+ db.getDirectory());
+ return proc.waitFor();
+ }
+
+ private byte[] readCgitMidx() throws IOException {
+ File midx = getMIdxStandardLocation();
+ assertTrue("cgit multi-pack-index exists", midx.exists());
+ return Files.readAllBytes(midx.toPath());
+ }
+
+ private void writeMidx(byte[] midx) throws IOException {
+ File midxFile = getMIdxStandardLocation();
+ Files.write(midxFile.toPath(), midx);
+ }
+
+ private File getMIdxStandardLocation() {
+ return new File(
+ ((ObjectDirectory) db.getObjectDatabase()).getPackDirectory(),
+ "multi-pack-index");
+ }
+
+ private byte[] trimPadding(byte[] data) {
+ // Chunk MUST have one \0, we want to remove any extra \0
+ int newEnd = data.length - 1;
+ while (newEnd - 1 >= 0 && data[newEnd - 1] == 0) {
+ newEnd--;
+ }
+
+ if (newEnd == data.length - 1) {
+ return data;
+ }
+ return Arrays.copyOfRange(data, 0, newEnd + 1);
+ }
+
+ private static class RawMultiPackIndex {
+ private final List<ChunkSegment> chunks;
+
+ private final byte[] midx;
+
+ private RawMultiPackIndex(byte[] midx) {
+ this.chunks = readChunks(midx);
+ this.midx = midx;
+ }
+
+ long getChunkSize(int chunkId) {
+ int chunkPos = findChunkPosition(chunks, chunkId);
+ return chunks.get(chunkPos + 1).offset
+ - chunks.get(chunkPos).offset;
+ }
+
+ long getOffset(int chunkId) {
+ return chunks.get(findChunkPosition(chunks, chunkId)).offset;
+ }
+
+ private long getNextOffset(int chunkId) {
+ return chunks.get(findChunkPosition(chunks, chunkId) + 1).offset;
+ }
+
+ byte[] getRawChunk(int chunkId) {
+ int start = (int) getOffset(chunkId);
+ int end = (int) getNextOffset(chunkId);
+ return Arrays.copyOfRange(midx, start, end);
+ }
+
+ private static int findChunkPosition(List<ChunkSegment> chunks,
+ int id) {
+ int chunkPos = -1;
+ for (int i = 0; i < chunks.size(); i++) {
+ if (chunks.get(i).id() == id) {
+ chunkPos = i;
+ break;
+ }
+ }
+ if (chunkPos == -1) {
+ throw new IllegalStateException("Chunk doesn't exist");
+ }
+ return chunkPos;
+ }
+
+ private List<ChunkSegment> readChunks(byte[] midx) {
+ // Read the number of "chunkOffsets" (1 byte)
+ int chunkCount = midx[6];
+ byte[] lookupBuffer = new byte[CHUNK_LOOKUP_WIDTH
+ * (chunkCount + 1)];
+ System.arraycopy(midx, 12, lookupBuffer, 0, lookupBuffer.length);
+
+ List<ChunkSegment> chunks = new ArrayList<>(chunkCount + 1);
+ for (int i = 0; i <= chunkCount; i++) {
+ // chunks[chunkCount] is just a marker, in order to record the
+ // length of the last chunk.
+ int id = NB.decodeInt32(lookupBuffer, i * 12);
+ long offset = NB.decodeInt64(lookupBuffer, i * 12 + 4);
+ chunks.add(new ChunkSegment(id, offset));
+ }
+ return chunks;
+ }
+ }
+
+ private record ChunkSegment(int id, long offset) {
+ }
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java
new file mode 100644
index 0000000000..82f3eb1e08
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2025, Google Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.midx;
+
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_LARGEOFFSETS;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDLOOKUP;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_PACKNAMES;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_REVINDEX;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.junit.FakeIndexFactory;
+import org.eclipse.jgit.junit.FakeIndexFactory.IndexObject;
+import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.util.NB;
+import org.junit.Test;
+
+public class MultiPackIndexWriterTest {
+
+ @Test
+ public void write_allSmallOffsets() throws IOException {
+ PackIndex index1 = indexOf(
+ object("0000000000000000000000000000000000000001", 500),
+ object("0000000000000000000000000000000000000003", 1500),
+ object("0000000000000000000000000000000000000005", 3000));
+ PackIndex index2 = indexOf(
+ object("0000000000000000000000000000000000000002", 500),
+ object("0000000000000000000000000000000000000004", 1500),
+ object("0000000000000000000000000000000000000006", 3000));
+
+ Map<String, PackIndex> data = Map.of("packname1", index1, "packname2",
+ index2);
+
+ MultiPackIndexWriter writer = new MultiPackIndexWriter();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ writer.write(NullProgressMonitor.INSTANCE, out, data);
+ // header (12 bytes)
+ // + chunkHeader (6 * 12 bytes)
+ // + fanout table (256 * 4 bytes)
+ // + OIDs (6 * 20 bytes)
+ // + (pack, offset) pairs (6 * 8)
+ // + RIDX (6 * 4 bytes)
+ // + packfile names (2 * 10)
+ // + checksum (20)
+ assertEquals(1340, out.size());
+ List<Integer> chunkIds = readChunkIds(out);
+ assertEquals(5, chunkIds.size());
+ assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
+ assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
+ assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
+ assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
+ assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+ }
+
+ @Test
+ public void write_smallOffset_limit() throws IOException {
+ PackIndex index1 = indexOf(
+ object("0000000000000000000000000000000000000001", 500),
+ object("0000000000000000000000000000000000000003", 1500),
+ object("0000000000000000000000000000000000000005", (1L << 32) -1));
+ PackIndex index2 = indexOf(
+ object("0000000000000000000000000000000000000002", 500),
+ object("0000000000000000000000000000000000000004", 1500),
+ object("0000000000000000000000000000000000000006", 3000));
+ Map<String, PackIndex> data =
+ Map.of("packname1", index1, "packname2", index2);
+
+ MultiPackIndexWriter writer = new MultiPackIndexWriter();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ writer.write(NullProgressMonitor.INSTANCE, out, data);
+ // header (12 bytes)
+ // + chunkHeader (6 * 12 bytes)
+ // + fanout table (256 * 4 bytes)
+ // + OIDs (6 * 20 bytes)
+ // + (pack, offset) pairs (6 * 8)
+ // + RIDX (6 * 4 bytes)
+ // + packfile names (2 * 10)
+ // + checksum (20)
+ assertEquals(1340, out.size());
+ List<Integer> chunkIds = readChunkIds(out);
+ assertEquals(5, chunkIds.size());
+ assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
+ assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
+ assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
+ assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
+ assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+ }
+
+ @Test
+ public void write_largeOffset() throws IOException {
+ PackIndex index1 = indexOf(
+ object("0000000000000000000000000000000000000001", 500),
+ object("0000000000000000000000000000000000000003", 1500),
+ object("0000000000000000000000000000000000000005", 1L << 32));
+ PackIndex index2 = indexOf(
+ object("0000000000000000000000000000000000000002", 500),
+ object("0000000000000000000000000000000000000004", 1500),
+ object("0000000000000000000000000000000000000006", 3000));
+ Map<String, PackIndex> data =
+ Map.of("packname1", index1, "packname2", index2);
+
+ MultiPackIndexWriter writer = new MultiPackIndexWriter();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ writer.write(NullProgressMonitor.INSTANCE, out, data);
+ // header (12 bytes)
+ // + chunkHeader (7 * 12 bytes)
+ // + fanout table (256 * 4 bytes)
+ // + OIDs (6 * 20 bytes)
+ // + (pack, offset) pairs (6 * 8)
+ // + (large-offset) (1 * 8)
+ // + RIDX (6 * 4 bytes)
+ // + packfile names (2 * 10)
+ // + checksum (20)
+ assertEquals(1360, out.size());
+ List<Integer> chunkIds = readChunkIds(out);
+ assertEquals(6, chunkIds.size());
+ assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
+ assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
+ assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
+ assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_LARGEOFFSETS));
+ assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
+ assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+ }
+
+ private List<Integer> readChunkIds(ByteArrayOutputStream out) {
+ List<Integer> chunkIds = new ArrayList<>();
+ byte[] raw = out.toByteArray();
+ int numChunks = raw[6];
+ int position = 12;
+ for (int i = 0; i < numChunks; i++) {
+ chunkIds.add(NB.decodeInt32(raw, position));
+ position += CHUNK_LOOKUP_WIDTH;
+ }
+ return chunkIds;
+ }
+
+ private static PackIndex indexOf(IndexObject... objs) {
+ return FakeIndexFactory.indexOf(Arrays.asList(objs));
+ }
+
+ private static IndexObject object(String name, long offset) {
+ return new IndexObject(name, offset);
+ }
+}
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index 7c47d3a04f..392f4e9954 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -529,6 +529,8 @@ mkDirsFailed=Creating directories for {0} failed
month=month
months=months
monthsAgo={0} months ago
+multiPackIndexUnexpectedSize=MultiPack index: expected %d bytes but out has %d bytes
+multiPackIndexWritingCancelled=Multipack index writing was canceled
multipleMergeBasesFor=Multiple merge bases for:\n {0}\n {1} found:\n {2}\n {3}
nameMustNotBeNullOrEmpty=Ref name must not be null or empty.
need2Arguments=Need 2 arguments
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index 07e882498c..d7fe456946 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -559,6 +559,8 @@ public class JGitText extends TranslationBundle {
/***/ public String month;
/***/ public String months;
/***/ public String monthsAgo;
+ /***/ public String multiPackIndexUnexpectedSize;
+ /***/ public String multiPackIndexWritingCancelled;
/***/ public String multipleMergeBasesFor;
/***/ public String nameMustNotBeNullOrEmpty;
/***/ public String need2Arguments;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexConstants.java
new file mode 100644
index 0000000000..6122a9a143
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexConstants.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2025, Google Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.midx;
+
+class MultiPackIndexConstants {
+ static final int MIDX_SIGNATURE = 0x4d494458; /* MIDX */
+
+ static final byte MIDX_VERSION = 1;
+
+ /**
+ * We infer the length of object IDs (OIDs) from this value:
+ *
+ * <pre>
+ * 1 => SHA-1
+ * 2 => SHA-256
+ * </pre>
+ */
+ static final byte OID_HASH_VERSION = 1;
+
+ static final int MULTIPACK_INDEX_FANOUT_SIZE = 4 * 256;
+
+ /**
+ * First 4 bytes describe the chunk id. Value 0 is a terminating label.
+ * Other 8 bytes provide the byte-offset in current file for chunk to start.
+ */
+ static final int CHUNK_LOOKUP_WIDTH = 12;
+
+ /** "PNAM" chunk */
+ static final int MIDX_CHUNKID_PACKNAMES = 0x504e414d;
+
+ /** "OIDF" chunk */
+ static final int MIDX_CHUNKID_OIDFANOUT = 0x4f494446;
+
+ /** "OIDL" chunk */
+ static final int MIDX_CHUNKID_OIDLOOKUP = 0x4f49444c;
+
+ /** "OOFF" chunk */
+ static final int MIDX_CHUNKID_OBJECTOFFSETS = 0x4f4f4646;
+
+ /** "LOFF" chunk */
+ static final int MIDX_CHUNKID_LARGEOFFSETS = 0x4c4f4646;
+
+ /** "RIDX" chunk */
+ static final int MIDX_CHUNKID_REVINDEX = 0x52494458;
+
+ private MultiPackIndexConstants() {
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java
new file mode 100644
index 0000000000..28d2fb2334
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2025, Google Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.midx;
+
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_LARGEOFFSETS;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDLOOKUP;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_PACKNAMES;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_REVINDEX;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_SIGNATURE;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_VERSION;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MULTIPACK_INDEX_FANOUT_SIZE;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.OID_HASH_VERSION;
+import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.io.CancellableDigestOutputStream;
+import org.eclipse.jgit.internal.storage.midx.PackIndexMerger.MidxMutableEntry;
+import org.eclipse.jgit.lib.ProgressMonitor;
+import org.eclipse.jgit.util.NB;
+
+/**
+ * Writes a collection of indexes as a multipack index.
+ * <p>
+ * See <a href=
+ * "https://git-scm.com/docs/pack-format#_multi_pack_index_midx_files_have_the_following_format">multipack
+ * index format spec</a>
+ */
+public class MultiPackIndexWriter {
+
+ private static final int LIMIT_31_BITS = (1 << 31) - 1;
+
+ private static final int MIDX_HEADER_SIZE = 12;
+
+ /**
+ * Writes the inputs in the multipack index format in the outputStream.
+ *
+ * @param monitor
+ * progress monitor
+ * @param outputStream
+ * stream to write the multipack index file
+ * @param inputs
+ * pairs of name and index for each pack to include in the
+ * multipack index.
+ * @throws IOException
+ * Error writing to the stream
+ */
+ public void write(ProgressMonitor monitor, OutputStream outputStream,
+ Map<String, PackIndex> inputs) throws IOException {
+ PackIndexMerger data = new PackIndexMerger(inputs);
+
+ // List of chunks in the order they need to be written
+ List<ChunkHeader> chunkHeaders = createChunkHeaders(data);
+ long expectedSize = calculateExpectedSize(chunkHeaders);
+ try (CancellableDigestOutputStream out = new CancellableDigestOutputStream(
+ monitor, outputStream)) {
+ writeHeader(out, chunkHeaders.size(), data.getPackCount());
+ writeChunkLookup(out, chunkHeaders);
+
+ WriteContext ctx = new WriteContext(out, data);
+ for (ChunkHeader chunk : chunkHeaders) {
+ chunk.writerFn.write(ctx);
+ }
+ writeCheckSum(out);
+ if (expectedSize != out.length()) {
+ throw new IllegalStateException(String.format(
+ JGitText.get().multiPackIndexUnexpectedSize,
+ expectedSize, out.length()));
+ }
+ } catch (InterruptedIOException e) {
+ throw new IOException(JGitText.get().multiPackIndexWritingCancelled,
+ e);
+ }
+ }
+
+ private static long calculateExpectedSize(List<ChunkHeader> chunks) {
+ int chunkLookup = (chunks.size() + 1) * CHUNK_LOOKUP_WIDTH;
+ long chunkContent = chunks.stream().mapToLong(c -> c.size).sum();
+ return /* header */ 12 + chunkLookup + chunkContent + /* CRC */ 20;
+ }
+
+ private List<ChunkHeader> createChunkHeaders(PackIndexMerger data) {
+ List<ChunkHeader> chunkHeaders = new ArrayList<>();
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_OIDFANOUT,
+ MULTIPACK_INDEX_FANOUT_SIZE, this::writeFanoutTable));
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_OIDLOOKUP,
+ (long) data.getUniqueObjectCount() * OBJECT_ID_LENGTH,
+ this::writeOidLookUp));
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_OBJECTOFFSETS,
+ 8L * data.getUniqueObjectCount(), this::writeObjectOffsets));
+ if (data.needsLargeOffsetsChunk()) {
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_LARGEOFFSETS,
+ 8L * data.getOffsetsOver31BitsCount(),
+ this::writeObjectLargeOffsets));
+ }
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_REVINDEX,
+ 4L * data.getUniqueObjectCount(), this::writeRidx));
+
+ int packNamesSize = data.getPackNames().stream()
+ .mapToInt(String::length).map(i -> i + 1 /* null at the end */)
+ .sum();
+ chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_PACKNAMES, packNamesSize,
+ this::writePackfileNames));
+ return chunkHeaders;
+ }
+
+ /**
+ * Write the first 12 bytes of the multipack index.
+ * <p>
+ * These bytes include things like magic number, version, number of
+ * chunks...
+ *
+ * @param out
+ * output stream to write
+ * @param numChunks
+ * number of chunks this multipack index is going to have
+ * @param packCount
+ * number of packs covered by this multipack index
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeHeader(CancellableDigestOutputStream out, int numChunks,
+ int packCount) throws IOException {
+ byte[] headerBuffer = new byte[MIDX_HEADER_SIZE];
+ NB.encodeInt32(headerBuffer, 0, MIDX_SIGNATURE);
+ byte[] buff = { MIDX_VERSION, OID_HASH_VERSION, (byte) numChunks,
+ (byte) 0 };
+ System.arraycopy(buff, 0, headerBuffer, 4, 4);
+ NB.encodeInt32(headerBuffer, 8, packCount);
+ out.write(headerBuffer, 0, headerBuffer.length);
+ out.flush();
+ }
+
+ /**
+ * Write a table of "chunkId, start-offset", with a special value "0,
+ * end-of-previous_chunk", to mark the end.
+ *
+ * @param out
+ * output stream to write
+ * @param chunkHeaders
+ * list of chunks in the order they are expected to be written
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeChunkLookup(CancellableDigestOutputStream out,
+ List<ChunkHeader> chunkHeaders) throws IOException {
+
+ // first chunk will start at header + this lookup block
+ long chunkStart = MIDX_HEADER_SIZE
+ + (long) (chunkHeaders.size() + 1) * CHUNK_LOOKUP_WIDTH;
+ byte[] chunkEntry = new byte[CHUNK_LOOKUP_WIDTH];
+ for (ChunkHeader chunkHeader : chunkHeaders) {
+ NB.encodeInt32(chunkEntry, 0, chunkHeader.chunkId);
+ NB.encodeInt64(chunkEntry, 4, chunkStart);
+ out.write(chunkEntry);
+ chunkStart += chunkHeader.size;
+ }
+ // Terminating label for the block
+ // (chunkid 0, offset where the next block would start)
+ NB.encodeInt32(chunkEntry, 0, 0);
+ NB.encodeInt64(chunkEntry, 4, chunkStart);
+ out.write(chunkEntry);
+ }
+
+ /**
+ * Write the fanout table for the object ids
+ * <p>
+ * Table with 256 entries (one byte), where the ith entry, F[i], stores the
+ * number of OIDs with first byte at most i. Thus, F[255] stores the total
+ * number of objects.
+ *
+ * @param ctx
+ * write context
+ * @throws IOException
+ * error writing to the output stream
+ */
+
+ private void writeFanoutTable(WriteContext ctx) throws IOException {
+ byte[] tmp = new byte[4];
+ int[] fanout = new int[256];
+ Iterator<MidxMutableEntry> iterator = ctx.data.bySha1Iterator();
+ while (iterator.hasNext()) {
+ MidxMutableEntry e = iterator.next();
+ fanout[e.getObjectId().getFirstByte() & 0xff]++;
+ }
+ for (int i = 1; i < fanout.length; i++) {
+ fanout[i] += fanout[i - 1];
+ }
+ for (int n : fanout) {
+ NB.encodeInt32(tmp, 0, n);
+ ctx.out.write(tmp, 0, 4);
+ }
+ }
+
+ /**
+ * Write the OID lookup chunk
+ * <p>
+ * A list of OIDs in sha1 order.
+ *
+ * @param ctx
+ * write context
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeOidLookUp(WriteContext ctx) throws IOException {
+ byte[] tmp = new byte[OBJECT_ID_LENGTH];
+
+ Iterator<MidxMutableEntry> iterator = ctx.data.bySha1Iterator();
+ while (iterator.hasNext()) {
+ MidxMutableEntry e = iterator.next();
+ e.getObjectId().copyRawTo(tmp, 0);
+ ctx.out.write(tmp, 0, OBJECT_ID_LENGTH);
+ }
+ }
+
+ /**
+ * Write the object offsets chunk
+ * <p>
+ * A list of offsets, parallel to the list of OIDs. If the offset is too
+ * large (see {@link #fitsIn31bits(long)}), this contains the position in
+ * the large offsets list (marked with a 1 in the most significant bit).
+ *
+ * @param ctx
+ * write context
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeObjectOffsets(WriteContext ctx) throws IOException {
+ byte[] entry = new byte[8];
+ Iterator<MidxMutableEntry> iterator = ctx.data.bySha1Iterator();
+ while (iterator.hasNext()) {
+ MidxMutableEntry e = iterator.next();
+ NB.encodeInt32(entry, 0, e.getPackId());
+ if (!ctx.data.needsLargeOffsetsChunk()
+ || fitsIn31bits(e.getOffset())) {
+ NB.encodeInt32(entry, 4, (int) e.getOffset());
+ } else {
+ int offloadedPosition = ctx.largeOffsets.append(e.getOffset());
+ NB.encodeInt32(entry, 4, offloadedPosition | (1 << 31));
+ }
+ ctx.out.write(entry);
+ }
+ }
+
+ /**
+ * Writes the reverse index chunk
+ * <p>
+ * This stores the position of the objects in the main index, ordered first
+ * by pack and then by offset
+ *
+ * @param ctx
+ * write context
+ * @throws IOException
+ * erorr writing to the output stream
+ */
+ private void writeRidx(WriteContext ctx) throws IOException {
+ Map<Integer, List<OffsetPosition>> packOffsets = new HashMap<>(
+ ctx.data.getPackCount());
+ // TODO(ifrade): Brute force solution loading all offsets/packs in
+ // memory. We could also iterate reverse indexes looking up
+ // their position in the midx (and discarding if the pack doesn't
+ // match).
+ Iterator<MidxMutableEntry> iterator = ctx.data.bySha1Iterator();
+ int midxPosition = 0;
+ while (iterator.hasNext()) {
+ MidxMutableEntry e = iterator.next();
+ OffsetPosition op = new OffsetPosition(e.getOffset(), midxPosition);
+ midxPosition++;
+ packOffsets.computeIfAbsent(e.getPackId(), k -> new ArrayList<>())
+ .add(op);
+ }
+
+ for (int i = 0; i < ctx.data.getPackCount(); i++) {
+ List<OffsetPosition> offsetsForPack = packOffsets.get(i);
+ if (offsetsForPack.isEmpty()) {
+ continue;
+ }
+ offsetsForPack.sort(Comparator.comparing(OffsetPosition::offset));
+ byte[] ridxForPack = new byte[4 * offsetsForPack.size()];
+ for (int j = 0; j < offsetsForPack.size(); j++) {
+ NB.encodeInt32(ridxForPack, j * 4,
+ offsetsForPack.get(j).position);
+ }
+ ctx.out.write(ridxForPack);
+ }
+ }
+
+ /**
+ * Write the large offset chunk
+ * <p>
+ * A list of large offsets (long). The regular offset chunk will point to a
+ * position here.
+ *
+ * @param ctx
+ * writer context
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeObjectLargeOffsets(WriteContext ctx) throws IOException {
+ ctx.out.write(ctx.largeOffsets.offsets, 0,
+ ctx.largeOffsets.bytePosition);
+ }
+
+ /**
+ * Write the list of packfiles chunk
+ * <p>
+ * List of packfiles (in lexicographical order) with an \0 at the end
+ *
+ * @param ctx
+ * writer context
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writePackfileNames(WriteContext ctx) throws IOException {
+ for (String packName : ctx.data.getPackNames()) {
+ // Spec doesn't talk about encoding.
+ ctx.out.write(packName.getBytes(StandardCharsets.UTF_8));
+ ctx.out.write(0);
+ }
+ }
+
+ /**
+ * Write final checksum of the data written to the stream
+ *
+ * @param out
+ * output stream used to write
+ * @throws IOException
+ * error writing to the output stream
+ */
+ private void writeCheckSum(CancellableDigestOutputStream out)
+ throws IOException {
+ out.write(out.getDigest());
+ out.flush();
+ }
+
+
+ private record OffsetPosition(long offset, int position) {
+ }
+
+ /**
+ * If there is at least one offset value larger than 2^32-1, then the large
+ * offset chunk must exist, and offsets larger than 2^31-1 must be stored in
+ * it instead
+ *
+ * @param offset object offset
+ *
+ * @return true if the offset fits in 31 bits
+ */
+ private static boolean fitsIn31bits(long offset) {
+ return offset <= LIMIT_31_BITS;
+ }
+
+ private static class LargeOffsets {
+ private final byte[] offsets;
+
+ private int bytePosition;
+
+ LargeOffsets(int largeOffsetsCount) {
+ offsets = new byte[largeOffsetsCount * 8];
+ bytePosition = 0;
+ }
+
+ /**
+ * Add an offset to the large offset chunk
+ *
+ * @param largeOffset
+ * a large offset
+ * @return the position of the just inserted offset (as in number of
+ * offsets, NOT in bytes)
+ */
+ int append(long largeOffset) {
+ int at = bytePosition;
+ NB.encodeInt64(offsets, at, largeOffset);
+ bytePosition += 8;
+ return at / 8;
+ }
+ }
+
+ private record ChunkHeader(int chunkId, long size, ChunkWriter writerFn) {
+ }
+
+ @FunctionalInterface
+ private interface ChunkWriter {
+ void write(WriteContext ctx) throws IOException;
+ }
+
+ private static class WriteContext {
+ final CancellableDigestOutputStream out;
+
+ final PackIndexMerger data;
+
+ final LargeOffsets largeOffsets;
+
+ WriteContext(CancellableDigestOutputStream out, PackIndexMerger data) {
+ this.out = out;
+ this.data = data;
+ this.largeOffsets = new LargeOffsets(
+ data.getOffsetsOver31BitsCount());
+ }
+ }
+}