]> source.dussan.org Git - jgit.git/commitdiff
PackReverseIndexWriter: write out version 1 reverse index file 81/197581/28
authorAnna Papitto <annapapitto@google.com>
Thu, 27 Apr 2023 18:01:30 +0000 (11:01 -0700)
committerAnna Papitto <annapapitto@google.com>
Fri, 28 Apr 2023 17:19:18 +0000 (10:19 -0700)
The reverse index for a pack is used to quickly find an object's
position in the pack's forward index based on that object's pack offset.
It is currently computed from the forward index by sorting the index
entries by the corresponding pack offset. This computation uses
bucket sort with insertion sort, which has an average runtime of
O(n log n) and worst case runtime of O(n^2); and memory usage of
3*size(int)*n because it maintains 3 int arrays, even after sorting is
completed. The computation must be performed every time that the reverse
index object is created in memory.

In contrast, Cgit persists a pack reverse index file to avoid
recomputing the reverse index ordering every time that it is needed.
Instead they write a file with format
https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format
which can later be read and parsed into an in-memory reverse index each
time it is needed.

Introduce these reverse index files to JGit. PackReverseIndexWriter
writes out a reverse index file to be read later when needed. Subclass
PackReverseIndexWriterV1 writes a file with the official version 1
format.

To avoid temporarily allocating an Integer collection while sorting and
writing out the contents, using memory 4*size(Integer)*n, use an
IntList and its #sort method, which uses quicksort.

Change-Id: I6437745777a16f723e2f1cfcce4e0d94e599dcee
Signed-off-by: Anna Papitto <annapapitto@google.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java [new file with mode: 0644]
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java [new file with mode: 0644]
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java [new file with mode: 0644]
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java [new file with mode: 0644]

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java
new file mode 100644 (file)
index 0000000..2209764
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+
+import org.junit.Test;
+
+public class PackReverseIndexWriterTest {
+
+       @Test
+       public void createWriter_defaultVersion() {
+               PackReverseIndexWriter version1 = PackReverseIndexWriter
+                               .createWriter(new ByteArrayOutputStream());
+
+               assertTrue(version1 instanceof PackReverseIndexWriterV1);
+       }
+
+       @Test
+       public void createWriter_version1() {
+               PackReverseIndexWriter version1 = PackReverseIndexWriter
+                               .createWriter(new ByteArrayOutputStream(), 1);
+
+               assertTrue(version1 instanceof PackReverseIndexWriterV1);
+       }
+
+       @Test
+       public void createWriter_unsupportedVersion() {
+               assertThrows(IllegalArgumentException.class,
+                               () -> PackReverseIndexWriter
+                                               .createWriter(new ByteArrayOutputStream(), 2));
+       }
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java
new file mode 100644 (file)
index 0000000..0124887
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
+import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
+import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
+import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
+import static org.junit.Assert.assertArrayEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.junit.Test;
+
+public class PackReverseIndexWriterV1Test {
+
+       private static byte[] PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', 'K', 'C',
+                       'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', '7', '8',
+                       '9', '0', };
+
+       @Test
+       public void write_noObjects() throws IOException {
+               ByteArrayOutputStream out = new ByteArrayOutputStream();
+               PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+                               1);
+               List<PackedObjectInfo> objectsSortedByName = new ArrayList<>();
+
+               writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+               byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+                               0x00, 0x00, 0x00, 0x01, // file version
+                               0x00, 0x00, 0x00, 0x01, // oid version
+                               // pack checksum to copy into at byte 12
+                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                               // checksum
+                               (byte) 0xd1, 0x1d, 0x17, (byte) 0xd5, (byte) 0xa1, 0x5c,
+                               (byte) 0x8f, 0x45, 0x7e, 0x06, (byte) 0x91, (byte) 0xf2, 0x7e,
+                               0x20, 0x35, 0x2c, (byte) 0xdc, 0x4c, 0x46, (byte) 0xe4, };
+               System.arraycopy(PACK_CHECKSUM, 0, expected, 12, PACK_CHECKSUM.length);
+               assertArrayEquals(expected, out.toByteArray());
+       }
+
+       @Test
+       public void write_oneObject() throws IOException {
+               ByteArrayOutputStream out = new ByteArrayOutputStream();
+               PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+                               1);
+               PackedObjectInfo a = objectInfo("a", OBJ_COMMIT, 0);
+               List<PackedObjectInfo> objectsSortedByName = List.of(a);
+
+               writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+               byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+                               0x00, 0x00, 0x00, 0x01, // file version
+                               0x00, 0x00, 0x00, 0x01, // oid version
+                               0x00, 0x00, 0x00, 0x00, // 0: "a" -> 0
+                               // pack checksum to copy into at byte 16
+                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                               // checksum
+                               0x48, 0x04, 0x29, 0x69, 0x2f, (byte) 0xf3, (byte) 0x80,
+                               (byte) 0xa1, (byte) 0xf5, (byte) 0x92, (byte) 0xc2, 0x21, 0x46,
+                               (byte) 0xd5, 0x08, 0x44, (byte) 0xa4, (byte) 0xf4, (byte) 0xc0,
+                               (byte) 0xec, };
+               System.arraycopy(PACK_CHECKSUM, 0, expected, 16, PACK_CHECKSUM.length);
+               assertArrayEquals(expected, out.toByteArray());
+       }
+
+       @Test
+       public void write_multipleObjects() throws IOException {
+               ByteArrayOutputStream out = new ByteArrayOutputStream();
+               PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+                               1);
+               PackedObjectInfo a = objectInfo("a", OBJ_BLOB, 20);
+               PackedObjectInfo b = objectInfo("b", OBJ_COMMIT, 0);
+               PackedObjectInfo c = objectInfo("c", OBJ_COMMIT, 52);
+               PackedObjectInfo d = objectInfo("d", OBJ_TREE, 7);
+               PackedObjectInfo e = objectInfo("e", OBJ_COMMIT, 38);
+               List<PackedObjectInfo> objectsSortedByName = List.of(a, b, c, d, e);
+
+               writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+               byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+                               0x00, 0x00, 0x00, 0x01, // file version
+                               0x00, 0x00, 0x00, 0x01, // oid version
+                               0x00, 0x00, 0x00, 0x01, // offset 0: "b" -> index @ 1
+                               0x00, 0x00, 0x00, 0x03, // offset 7: "d" -> index @ 3
+                               0x00, 0x00, 0x00, 0x00, // offset 20: "a" -> index @ 0
+                               0x00, 0x00, 0x00, 0x04, // offset 38: "e" -> index @ 4
+                               0x00, 0x00, 0x00, 0x02, // offset 52: "c" -> index @ 2
+                               // pack checksum to copy into at byte 32
+                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                               // checksum
+                               (byte) 0xe3, 0x1c, (byte) 0xd3, (byte) 0xeb, (byte) 0x83, 0x47,
+                               (byte) 0x80, (byte) 0xb1, (byte) 0xe0, 0x3c, 0x2b, 0x25,
+                               (byte) 0xd6, 0x3e, (byte) 0xdc, (byte) 0xde, (byte) 0xbe, 0x4b,
+                               0x0a, (byte) 0xe2, };
+               System.arraycopy(PACK_CHECKSUM, 0, expected, 32, PACK_CHECKSUM.length);
+               assertArrayEquals(expected, out.toByteArray());
+       }
+
+       private static PackedObjectInfo objectInfo(String objectId, int type,
+                       long offset) {
+               assert (objectId.length() == 1);
+               PackedObjectInfo objectInfo = new PackedObjectInfo(
+                               ObjectId.fromString(objectId.repeat(OBJECT_ID_STRING_LENGTH)));
+               objectInfo.setType(type);
+               objectInfo.setOffset(offset);
+               return objectInfo;
+       }
+}
index 78dd9bd4c32614c9c324b93a56e0027b8dd6521c..bc8144f1c9a71e2664de5e3eafea1465cac1c380 100644 (file)
@@ -839,6 +839,7 @@ unsupportedGC=Unsupported garbage collector for repository type: {0}
 unsupportedMark=Mark not supported
 unsupportedOperationNotAddAtEnd=Not add-at-end: {0}
 unsupportedPackIndexVersion=Unsupported pack index version {0}
+unsupportedPackReverseIndexVersion=Unsupported pack reverse index version {0}
 unsupportedPackVersion=Unsupported pack version {0}.
 unsupportedReftableVersion=Unsupported reftable version {0}.
 unsupportedRepositoryDescription=Repository description not supported
index 28634cbc4dfe3d0b249a0910552122adcaece983..518e0b7d9b76e4ce3091c993b04f9371e059459f 100644 (file)
@@ -868,6 +868,7 @@ public class JGitText extends TranslationBundle {
        /***/ public String unsupportedMark;
        /***/ public String unsupportedOperationNotAddAtEnd;
        /***/ public String unsupportedPackIndexVersion;
+       /***/ public String unsupportedPackReverseIndexVersion;
        /***/ public String unsupportedPackVersion;
        /***/ public String unsupportedReftableVersion;
        /***/ public String unsupportedRepositoryDescription;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java
new file mode 100644 (file)
index 0000000..4c8417b
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.BufferedOutputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.DigestOutputStream;
+import java.text.MessageFormat;
+import java.util.List;
+
+import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+
+/**
+ * Writes reverse index files conforming to the requested version.
+ * <p>
+ * The reverse index file format is specified at
+ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
+ */
+public abstract class PackReverseIndexWriter {
+       /**
+        * Magic bytes that uniquely identify git reverse index files.
+        */
+       protected static byte[] MAGIC = { 'R', 'I', 'D', 'X' };
+
+       /**
+        * The first reverse index file version.
+        */
+       protected static final int VERSION_1 = 1;
+
+       /**
+        * Stream to write contents to while maintaining a checksum.
+        */
+       protected final DigestOutputStream out;
+
+       /**
+        * Stream to write primitive type contents to while maintaining a checksum.
+        */
+       protected final DataOutput dataOutput;
+
+       private static final int DEFAULT_VERSION = VERSION_1;
+
+       /**
+        * Construct the components of a PackReverseIndexWriter that are shared
+        * between subclasses.
+        *
+        * @param dst
+        *            the OutputStream that the instance will write contents to
+        */
+       protected PackReverseIndexWriter(OutputStream dst) {
+               out = new DigestOutputStream(
+                               dst instanceof BufferedOutputStream ? dst
+                                               : new BufferedOutputStream(dst),
+                               Constants.newMessageDigest());
+               dataOutput = new SimpleDataOutput(out);
+       }
+
+       /**
+        * Create a writer instance for the default file format version.
+        *
+        * @param dst
+        *            the OutputStream that contents will be written to
+        * @return the new writer instance
+        */
+       public static PackReverseIndexWriter createWriter(OutputStream dst) {
+               return createWriter(dst, DEFAULT_VERSION);
+       }
+
+       /**
+        * Create a writer instance for the specified file format version.
+        *
+        * @param dst
+        *            the OutputStream that contents will be written to
+        * @param version
+        *            the reverse index format version to write contents as
+        * @return the new writer instance
+        */
+       public static PackReverseIndexWriter createWriter(OutputStream dst,
+                       int version) {
+               if (version == VERSION_1) {
+                       return new PackReverseIndexWriterV1(dst);
+               }
+               throw new IllegalArgumentException(MessageFormat.format(
+                               JGitText.get().unsupportedPackReverseIndexVersion,
+                               Integer.toString(version)));
+       }
+
+       /**
+        * Write the contents of a reverse index file for the given objects.
+        *
+        * @param objectsByIndexPos
+        *            the objects whose forward index file positions should be
+        *            written, sorted by forward index file position (currently SHA1
+        *            ordering)
+        * @param packChecksum
+        *            the checksum of the corresponding pack file
+        * @throws IOException
+        *             if writing the output fails
+        */
+       public void write(
+                       List<? extends PackedObjectInfo> objectsByIndexPos,
+                       byte[] packChecksum) throws IOException {
+               writeHeader();
+               writeBody(objectsByIndexPos);
+               writeFooter(packChecksum);
+               out.flush();
+       }
+
+       /**
+        * Write the header of a reverse index file, usually the magic bytes and the
+        * file format version.
+        *
+        * @throws IOException
+        *             if writing the output fails
+        */
+       protected abstract void writeHeader() throws IOException;
+
+       /**
+        * Write the body of a reverse index file, usually the forward index
+        * positions of the given objects, sorted by those objects' pack file
+        * offsets.
+        *
+        * @param objectsSortedByIndexPosition
+        *            the objects whose forward index file positions should be
+        *            written, sorted by forward index file position; not modified
+        *            during method
+        * @throws IOException
+        *             if writing the output fails
+        */
+       protected abstract void writeBody(
+                       List<? extends PackedObjectInfo> objectsSortedByIndexPosition)
+                       throws IOException;
+
+       private void writeFooter(byte[] packChecksum) throws IOException {
+               out.write(packChecksum);
+               byte[] selfChecksum = out.getMessageDigest().digest();
+               out.write(selfChecksum);
+       }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java
new file mode 100644 (file)
index 0000000..7630724
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.eclipse.jgit.util.IntList;
+import org.eclipse.jgit.util.IntList.IntComparator;
+
+/**
+ * Writes reverse index files following the version 1 format.
+ * <p>
+ * The file format is specified at
+ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
+ */
+final class PackReverseIndexWriterV1 extends PackReverseIndexWriter {
+       private static final int OID_VERSION_SHA1 = 1;
+
+       private static final int DEFAULT_OID_VERSION = OID_VERSION_SHA1;
+
+       PackReverseIndexWriterV1(final OutputStream dst) {
+               super(dst);
+       }
+
+       @Override
+       protected void writeHeader() throws IOException {
+               out.write(MAGIC);
+               dataOutput.writeInt(VERSION_1);
+               dataOutput.writeInt(DEFAULT_OID_VERSION);
+       }
+
+       @Override
+       protected void writeBody(List<? extends PackedObjectInfo> objectsByIndexPos)
+                       throws IOException {
+               IntList positionsByOffset = IntList.filledWithRange(0,
+                               objectsByIndexPos.size());
+               positionsByOffset
+                               .sort(new IndexPositionsByOffsetComparator(objectsByIndexPos));
+
+               for (int i = 0; i < positionsByOffset.size(); i++) {
+                       int indexPosition = positionsByOffset.get(i);
+                       dataOutput.writeInt(indexPosition);
+               }
+       }
+
+       private static class IndexPositionsByOffsetComparator
+                       implements IntComparator {
+               private List<? extends PackedObjectInfo> objectsByIndexPos;
+
+               private IndexPositionsByOffsetComparator(
+                               List<? extends PackedObjectInfo> objectsByIndexPos) {
+                       this.objectsByIndexPos = objectsByIndexPos;
+               }
+
+               @Override
+               public int compare(int firstIndexPosition, int secondIndexPosition) {
+                       return Long.compare(getOffset(firstIndexPosition),
+                                       getOffset(secondIndexPosition));
+               }
+
+               private long getOffset(int indexPosition) {
+                       return objectsByIndexPos.get(indexPosition).getOffset();
+               }
+       }
+}