aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnna Papitto <annapapitto@google.com>2023-04-27 11:01:30 -0700
committerAnna Papitto <annapapitto@google.com>2023-04-28 10:19:18 -0700
commit64615b44e6f4a7f14a43ba19d4dd28f8483ed2eb (patch)
tree98081ecdbfddcfd13c79b9fbb2cbdd73620ad9f2
parent7d3f893d31a02ebf50b34fba7203f996d5162c62 (diff)
downloadjgit-64615b44e6f4a7f14a43ba19d4dd28f8483ed2eb.tar.gz
jgit-64615b44e6f4a7f14a43ba19d4dd28f8483ed2eb.zip
PackReverseIndexWriter: write out version 1 reverse index file
The reverse index for a pack is used to quickly find an object's position in the pack's forward index based on that object's pack offset. It is currently computed from the forward index by sorting the index entries by the corresponding pack offset. This computation uses bucket sort with insertion sort, which has an average runtime of O(n log n) and worst case runtime of O(n^2); and memory usage of 3*size(int)*n because it maintains 3 int arrays, even after sorting is completed. The computation must be performed every time that the reverse index object is created in memory. In contrast, Cgit persists a pack reverse index file to avoid recomputing the reverse index ordering every time that it is needed. Instead they write a file with format https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format which can later be read and parsed into an in-memory reverse index each time it is needed. Introduce these reverse index files to JGit. PackReverseIndexWriter writes out a reverse index file to be read later when needed. Subclass PackReverseIndexWriterV1 writes a file with the official version 1 format. To avoid temporarily allocating an Integer collection while sorting and writing out the contents, using memory 4*size(Integer)*n, use an IntList and its #sort method, which uses quicksort. Change-Id: I6437745777a16f723e2f1cfcce4e0d94e599dcee Signed-off-by: Anna Papitto <annapapitto@google.com>
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java43
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java122
-rw-r--r--org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties1
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java1
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java149
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java75
6 files changed, 391 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java
new file mode 100644
index 0000000000..220976471f
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterTest.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+
+import org.junit.Test;
+
+public class PackReverseIndexWriterTest {
+
+ @Test
+ public void createWriter_defaultVersion() {
+ PackReverseIndexWriter version1 = PackReverseIndexWriter
+ .createWriter(new ByteArrayOutputStream());
+
+ assertTrue(version1 instanceof PackReverseIndexWriterV1);
+ }
+
+ @Test
+ public void createWriter_version1() {
+ PackReverseIndexWriter version1 = PackReverseIndexWriter
+ .createWriter(new ByteArrayOutputStream(), 1);
+
+ assertTrue(version1 instanceof PackReverseIndexWriterV1);
+ }
+
+ @Test
+ public void createWriter_unsupportedVersion() {
+ assertThrows(IllegalArgumentException.class,
+ () -> PackReverseIndexWriter
+ .createWriter(new ByteArrayOutputStream(), 2));
+ }
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java
new file mode 100644
index 0000000000..0124887af1
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1Test.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
+import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
+import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
+import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
+import static org.junit.Assert.assertArrayEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.junit.Test;
+
+public class PackReverseIndexWriterV1Test {
+
+ private static byte[] PACK_CHECKSUM = new byte[] { 'P', 'A', 'C', 'K', 'C',
+ 'H', 'E', 'C', 'K', 'S', 'U', 'M', '3', '4', '5', '6', '7', '8',
+ '9', '0', };
+
+ @Test
+ public void write_noObjects() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+ 1);
+ List<PackedObjectInfo> objectsSortedByName = new ArrayList<>();
+
+ writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+ byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+ 0x00, 0x00, 0x00, 0x01, // file version
+ 0x00, 0x00, 0x00, 0x01, // oid version
+ // pack checksum to copy into at byte 12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // checksum
+ (byte) 0xd1, 0x1d, 0x17, (byte) 0xd5, (byte) 0xa1, 0x5c,
+ (byte) 0x8f, 0x45, 0x7e, 0x06, (byte) 0x91, (byte) 0xf2, 0x7e,
+ 0x20, 0x35, 0x2c, (byte) 0xdc, 0x4c, 0x46, (byte) 0xe4, };
+ System.arraycopy(PACK_CHECKSUM, 0, expected, 12, PACK_CHECKSUM.length);
+ assertArrayEquals(expected, out.toByteArray());
+ }
+
+ @Test
+ public void write_oneObject() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+ 1);
+ PackedObjectInfo a = objectInfo("a", OBJ_COMMIT, 0);
+ List<PackedObjectInfo> objectsSortedByName = List.of(a);
+
+ writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+ byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+ 0x00, 0x00, 0x00, 0x01, // file version
+ 0x00, 0x00, 0x00, 0x01, // oid version
+ 0x00, 0x00, 0x00, 0x00, // 0: "a" -> 0
+ // pack checksum to copy into at byte 16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // checksum
+ 0x48, 0x04, 0x29, 0x69, 0x2f, (byte) 0xf3, (byte) 0x80,
+ (byte) 0xa1, (byte) 0xf5, (byte) 0x92, (byte) 0xc2, 0x21, 0x46,
+ (byte) 0xd5, 0x08, 0x44, (byte) 0xa4, (byte) 0xf4, (byte) 0xc0,
+ (byte) 0xec, };
+ System.arraycopy(PACK_CHECKSUM, 0, expected, 16, PACK_CHECKSUM.length);
+ assertArrayEquals(expected, out.toByteArray());
+ }
+
+ @Test
+ public void write_multipleObjects() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackReverseIndexWriter writer = PackReverseIndexWriter.createWriter(out,
+ 1);
+ PackedObjectInfo a = objectInfo("a", OBJ_BLOB, 20);
+ PackedObjectInfo b = objectInfo("b", OBJ_COMMIT, 0);
+ PackedObjectInfo c = objectInfo("c", OBJ_COMMIT, 52);
+ PackedObjectInfo d = objectInfo("d", OBJ_TREE, 7);
+ PackedObjectInfo e = objectInfo("e", OBJ_COMMIT, 38);
+ List<PackedObjectInfo> objectsSortedByName = List.of(a, b, c, d, e);
+
+ writer.write(objectsSortedByName, PACK_CHECKSUM);
+
+ byte[] expected = new byte[] { 'R', 'I', 'D', 'X', // magic
+ 0x00, 0x00, 0x00, 0x01, // file version
+ 0x00, 0x00, 0x00, 0x01, // oid version
+ 0x00, 0x00, 0x00, 0x01, // offset 0: "b" -> index @ 1
+ 0x00, 0x00, 0x00, 0x03, // offset 7: "d" -> index @ 3
+ 0x00, 0x00, 0x00, 0x00, // offset 20: "a" -> index @ 0
+ 0x00, 0x00, 0x00, 0x04, // offset 38: "e" -> index @ 4
+ 0x00, 0x00, 0x00, 0x02, // offset 52: "c" -> index @ 2
+ // pack checksum to copy into at byte 32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // checksum
+ (byte) 0xe3, 0x1c, (byte) 0xd3, (byte) 0xeb, (byte) 0x83, 0x47,
+ (byte) 0x80, (byte) 0xb1, (byte) 0xe0, 0x3c, 0x2b, 0x25,
+ (byte) 0xd6, 0x3e, (byte) 0xdc, (byte) 0xde, (byte) 0xbe, 0x4b,
+ 0x0a, (byte) 0xe2, };
+ System.arraycopy(PACK_CHECKSUM, 0, expected, 32, PACK_CHECKSUM.length);
+ assertArrayEquals(expected, out.toByteArray());
+ }
+
+ private static PackedObjectInfo objectInfo(String objectId, int type,
+ long offset) {
+ assert (objectId.length() == 1);
+ PackedObjectInfo objectInfo = new PackedObjectInfo(
+ ObjectId.fromString(objectId.repeat(OBJECT_ID_STRING_LENGTH)));
+ objectInfo.setType(type);
+ objectInfo.setOffset(offset);
+ return objectInfo;
+ }
+}
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index 78dd9bd4c3..bc8144f1c9 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -839,6 +839,7 @@ unsupportedGC=Unsupported garbage collector for repository type: {0}
unsupportedMark=Mark not supported
unsupportedOperationNotAddAtEnd=Not add-at-end: {0}
unsupportedPackIndexVersion=Unsupported pack index version {0}
+unsupportedPackReverseIndexVersion=Unsupported pack reverse index version {0}
unsupportedPackVersion=Unsupported pack version {0}.
unsupportedReftableVersion=Unsupported reftable version {0}.
unsupportedRepositoryDescription=Repository description not supported
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index 28634cbc4d..518e0b7d9b 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -868,6 +868,7 @@ public class JGitText extends TranslationBundle {
/***/ public String unsupportedMark;
/***/ public String unsupportedOperationNotAddAtEnd;
/***/ public String unsupportedPackIndexVersion;
+ /***/ public String unsupportedPackReverseIndexVersion;
/***/ public String unsupportedPackVersion;
/***/ public String unsupportedReftableVersion;
/***/ public String unsupportedRepositoryDescription;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java
new file mode 100644
index 0000000000..4c8417b115
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriter.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.BufferedOutputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.DigestOutputStream;
+import java.text.MessageFormat;
+import java.util.List;
+
+import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+
+/**
+ * Writes reverse index files conforming to the requested version.
+ * <p>
+ * The reverse index file format is specified at
+ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
+ */
+public abstract class PackReverseIndexWriter {
+ /**
+ * Magic bytes that uniquely identify git reverse index files.
+ */
+ protected static byte[] MAGIC = { 'R', 'I', 'D', 'X' };
+
+ /**
+ * The first reverse index file version.
+ */
+ protected static final int VERSION_1 = 1;
+
+ /**
+ * Stream to write contents to while maintaining a checksum.
+ */
+ protected final DigestOutputStream out;
+
+ /**
+ * Stream to write primitive type contents to while maintaining a checksum.
+ */
+ protected final DataOutput dataOutput;
+
+ private static final int DEFAULT_VERSION = VERSION_1;
+
+ /**
+ * Construct the components of a PackReverseIndexWriter that are shared
+ * between subclasses.
+ *
+ * @param dst
+ * the OutputStream that the instance will write contents to
+ */
+ protected PackReverseIndexWriter(OutputStream dst) {
+ out = new DigestOutputStream(
+ dst instanceof BufferedOutputStream ? dst
+ : new BufferedOutputStream(dst),
+ Constants.newMessageDigest());
+ dataOutput = new SimpleDataOutput(out);
+ }
+
+ /**
+ * Create a writer instance for the default file format version.
+ *
+ * @param dst
+ * the OutputStream that contents will be written to
+ * @return the new writer instance
+ */
+ public static PackReverseIndexWriter createWriter(OutputStream dst) {
+ return createWriter(dst, DEFAULT_VERSION);
+ }
+
+ /**
+ * Create a writer instance for the specified file format version.
+ *
+ * @param dst
+ * the OutputStream that contents will be written to
+ * @param version
+ * the reverse index format version to write contents as
+ * @return the new writer instance
+ */
+ public static PackReverseIndexWriter createWriter(OutputStream dst,
+ int version) {
+ if (version == VERSION_1) {
+ return new PackReverseIndexWriterV1(dst);
+ }
+ throw new IllegalArgumentException(MessageFormat.format(
+ JGitText.get().unsupportedPackReverseIndexVersion,
+ Integer.toString(version)));
+ }
+
+ /**
+ * Write the contents of a reverse index file for the given objects.
+ *
+ * @param objectsByIndexPos
+ * the objects whose forward index file positions should be
+ * written, sorted by forward index file position (currently SHA1
+ * ordering)
+ * @param packChecksum
+ * the checksum of the corresponding pack file
+ * @throws IOException
+ * if writing the output fails
+ */
+ public void write(
+ List<? extends PackedObjectInfo> objectsByIndexPos,
+ byte[] packChecksum) throws IOException {
+ writeHeader();
+ writeBody(objectsByIndexPos);
+ writeFooter(packChecksum);
+ out.flush();
+ }
+
+ /**
+ * Write the header of a reverse index file, usually the magic bytes and the
+ * file format version.
+ *
+ * @throws IOException
+ * if writing the output fails
+ */
+ protected abstract void writeHeader() throws IOException;
+
+ /**
+ * Write the body of a reverse index file, usually the forward index
+ * positions of the given objects, sorted by those objects' pack file
+ * offsets.
+ *
+ * @param objectsSortedByIndexPosition
+ * the objects whose forward index file positions should be
+ * written, sorted by forward index file position; not modified
+ * during method
+ * @throws IOException
+ * if writing the output fails
+ */
+ protected abstract void writeBody(
+ List<? extends PackedObjectInfo> objectsSortedByIndexPosition)
+ throws IOException;
+
+ private void writeFooter(byte[] packChecksum) throws IOException {
+ out.write(packChecksum);
+ byte[] selfChecksum = out.getMessageDigest().digest();
+ out.write(selfChecksum);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java
new file mode 100644
index 0000000000..7630724d09
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndexWriterV1.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2023, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.eclipse.jgit.util.IntList;
+import org.eclipse.jgit.util.IntList.IntComparator;
+
+/**
+ * Writes reverse index files following the version 1 format.
+ * <p>
+ * The file format is specified at
+ * https://git-scm.com/docs/pack-format#_pack_rev_files_have_the_format.
+ */
+final class PackReverseIndexWriterV1 extends PackReverseIndexWriter {
+ private static final int OID_VERSION_SHA1 = 1;
+
+ private static final int DEFAULT_OID_VERSION = OID_VERSION_SHA1;
+
+ PackReverseIndexWriterV1(final OutputStream dst) {
+ super(dst);
+ }
+
+ @Override
+ protected void writeHeader() throws IOException {
+ out.write(MAGIC);
+ dataOutput.writeInt(VERSION_1);
+ dataOutput.writeInt(DEFAULT_OID_VERSION);
+ }
+
+ @Override
+ protected void writeBody(List<? extends PackedObjectInfo> objectsByIndexPos)
+ throws IOException {
+ IntList positionsByOffset = IntList.filledWithRange(0,
+ objectsByIndexPos.size());
+ positionsByOffset
+ .sort(new IndexPositionsByOffsetComparator(objectsByIndexPos));
+
+ for (int i = 0; i < positionsByOffset.size(); i++) {
+ int indexPosition = positionsByOffset.get(i);
+ dataOutput.writeInt(indexPosition);
+ }
+ }
+
+ private static class IndexPositionsByOffsetComparator
+ implements IntComparator {
+ private List<? extends PackedObjectInfo> objectsByIndexPos;
+
+ private IndexPositionsByOffsetComparator(
+ List<? extends PackedObjectInfo> objectsByIndexPos) {
+ this.objectsByIndexPos = objectsByIndexPos;
+ }
+
+ @Override
+ public int compare(int firstIndexPosition, int secondIndexPosition) {
+ return Long.compare(getOffset(firstIndexPosition),
+ getOffset(secondIndexPosition));
+ }
+
+ private long getOffset(int indexPosition) {
+ return objectsByIndexPos.get(indexPosition).getOffset();
+ }
+ }
+}