--- /dev/null
+/*
+ * Copyright (C) 2022, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.eclipse.jgit.util.BlockList;
+import org.junit.Test;
+
+public class PackObjectSizeIndexV1Test {
+ private static final ObjectId OBJ_ID = ObjectId
+ .fromString("b8b1d53172fb3fb19647adce4b38fab4371c2454");
+
+ private static final long GB = 1 << 30;
+
+ private static final int MAX_24BITS_UINT = 0xffffff;
+
+ @Test
+ public void write_24bPositions_32bSizes() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(400));
+ objs.add(blobWithSize(200));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x03, // obj count
+ 0x18, // Unsigned 3 bytes
+ 0x00, 0x00, 0x00, 0x03, // 3 positions
+ 0x00, 0x00, 0x00, // positions
+ 0x00, 0x00, 0x01, //
+ 0x00, 0x00, 0x02, //
+ 0x00, // No more positions
+ 0x00, 0x00, 0x00, 0x64, // size 100
+ 0x00, 0x00, 0x01, (byte) 0x90, // size 400
+ 0x00, 0x00, 0x00, (byte) 0xc8, // size 200
+ 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_32bPositions_32bSizes() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new BlockList<>(9_000_000);
+ // The 24 bit range is full of commits and trees
+ PackedObjectInfo commit = objInfo(Constants.OBJ_COMMIT, 100);
+ for (int i = 0; i <= MAX_24BITS_UINT; i++) {
+ objs.add(commit);
+ }
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(400));
+ objs.add(blobWithSize(200));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x03, // obj count
+ (byte) 0x20, // Signed 4 bytes
+ 0x00, 0x00, 0x00, 0x03, // 3 positions
+ 0x01, 0x00, 0x00, 0x00, // positions
+ 0x01, 0x00, 0x00, 0x01, //
+ 0x01, 0x00, 0x00, 0x02, //
+ 0x00, // No more positions
+ 0x00, 0x00, 0x00, 0x64, // size 100
+ 0x00, 0x00, 0x01, (byte) 0x90, // size 400
+ 0x00, 0x00, 0x00, (byte) 0xc8, // size 200
+ 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_24b32bPositions_32bSizes() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new BlockList<>(9_000_000);
+ // The 24 bit range is full of commits and trees
+ PackedObjectInfo commit = objInfo(Constants.OBJ_COMMIT, 100);
+ for (int i = 0; i < MAX_24BITS_UINT; i++) {
+ objs.add(commit);
+ }
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(400));
+ objs.add(blobWithSize(200));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x03, // obj count
+ 0x18, // 3 bytes
+ 0x00, 0x00, 0x00, 0x01, // 1 position
+ (byte) 0xff, (byte) 0xff, (byte) 0xff,
+ (byte) 0x20, // 4 bytes (32 bits)
+ 0x00, 0x00, 0x00, 0x02, // 2 positions
+ 0x01, 0x00, 0x00, 0x00, // positions
+ 0x01, 0x00, 0x00, 0x01, //
+ 0x00, // No more positions
+ 0x00, 0x00, 0x00, 0x64, // size 100
+ 0x00, 0x00, 0x01, (byte) 0x90, // size 400
+ 0x00, 0x00, 0x00, (byte) 0xc8, // size 200
+ 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_64bitsSize() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(3 * GB));
+ objs.add(blobWithSize(4 * GB));
+ objs.add(blobWithSize(400));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x04, // Object count
+ 0x18, // Unsigned 3 byte positions
+ 0x00, 0x00, 0x00, 0x04, // 4 positions
+ 0x00, 0x00, 0x00, // positions
+ 0x00, 0x00, 0x01, //
+ 0x00, 0x00, 0x02, //
+ 0x00, 0x00, 0x03, //
+ 0x00, // No more positions
+ 0x00, 0x00, 0x00, 0x64, // size 100
+ (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, // -1 (3GB)
+ (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xfe, // -2 (4GB)
+ 0x00, 0x00, 0x01, (byte) 0x90, // size 400
+ 0x00, 0x00, 0x00, (byte) 0x02, // 64bit sizes counter (2)
+ 0x00, 0x00, 0x00, 0x00, // size 3Gb
+ (byte) 0xc0, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x01, // size 4GB
+ (byte) 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x00, 0x00 // 128bit sizes counter
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_allObjectsTooSmall() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 1 << 10);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ // too small blobs
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(200));
+ objs.add(blobWithSize(400));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x04, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x00, // Object count
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_onlyBlobsIndexed() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(objInfo(Constants.OBJ_COMMIT, 1000));
+ objs.add(blobWithSize(100));
+ objs.add(objInfo(Constants.OBJ_TAG, 1000));
+ objs.add(blobWithSize(400));
+ objs.add(blobWithSize(200));
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x03, // Object count
+ 0x18, // Positions in 3 bytes
+ 0x00, 0x00, 0x00, 0x03, // 3 entries
+ 0x00, 0x00, 0x01, // positions
+ 0x00, 0x00, 0x03, //
+ 0x00, 0x00, 0x04, //
+ 0x00, // No more positions
+ 0x00, 0x00, 0x00, 0x64, // size 100
+ 0x00, 0x00, 0x01, (byte) 0x90, // size 400
+ 0x00, 0x00, 0x00, (byte) 0xc8, // size 200
+ 0x00, 0x00, 0x00, 0x00 // 64bit sizes counter
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void write_noObjects() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+
+ writer.write(objs);
+ byte[] expected = new byte[] { -1, 's', 'i', 'z', // header
+ 0x01, // version
+ 0x00, 0x00, 0x00, 0x00, // minimum object size
+ 0x00, 0x00, 0x00, 0x00, // Object count
+ };
+ byte[] output = out.toByteArray();
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void read_empty() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ assertEquals(-1, index.getSize(0));
+ assertEquals(-1, index.getSize(1));
+ assertEquals(-1, index.getSize(1 << 30));
+ assertEquals(0, index.getThreshold());
+ }
+
+ @Test
+ public void read_only24bitsPositions() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(blobWithSize(100));
+ objs.add(blobWithSize(200));
+ objs.add(blobWithSize(400));
+ objs.add(blobWithSize(1500));
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ assertEquals(100, index.getSize(0));
+ assertEquals(200, index.getSize(1));
+ assertEquals(400, index.getSize(2));
+ assertEquals(1500, index.getSize(3));
+ assertEquals(0, index.getThreshold());
+ }
+
+ @Test
+ public void read_only32bitsPositions() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 2000);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ PackedObjectInfo smallObj = blobWithSize(100);
+ for (int i = 0; i <= MAX_24BITS_UINT; i++) {
+ objs.add(smallObj);
+ }
+ objs.add(blobWithSize(1000));
+ objs.add(blobWithSize(3000));
+ objs.add(blobWithSize(2500));
+ objs.add(blobWithSize(1000));
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ assertEquals(-1, index.getSize(5));
+ assertEquals(-1, index.getSize(MAX_24BITS_UINT+1));
+ assertEquals(3000, index.getSize(MAX_24BITS_UINT+2));
+ assertEquals(2500, index.getSize(MAX_24BITS_UINT+3));
+ assertEquals(-1, index.getSize(MAX_24BITS_UINT+4)); // Not indexed
+ assertEquals(2000, index.getThreshold());
+ }
+
+ @Test
+ public void read_24and32BitsPositions() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 2000);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ PackedObjectInfo smallObj = blobWithSize(100);
+ for (int i = 0; i <= MAX_24BITS_UINT; i++) {
+ if (i == 500 || i == 1000 || i == 1500) {
+ objs.add(blobWithSize(2500));
+ continue;
+ }
+ objs.add(smallObj);
+ }
+ objs.add(blobWithSize(3000));
+ objs.add(blobWithSize(1000));
+ objs.add(blobWithSize(2500));
+ objs.add(blobWithSize(1000));
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ // 24 bit positions
+ assertEquals(-1, index.getSize(5));
+ assertEquals(2500, index.getSize(500));
+ // 32 bit positions
+ assertEquals(3000, index.getSize(MAX_24BITS_UINT+1));
+ assertEquals(-1, index.getSize(MAX_24BITS_UINT+2));
+ assertEquals(2500, index.getSize(MAX_24BITS_UINT+3));
+ assertEquals(-1, index.getSize(MAX_24BITS_UINT+4)); // Not indexed
+ assertEquals(2000, index.getThreshold());
+ }
+
+ @Test
+ public void read_only64bits() throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, 0);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(blobWithSize(3 * GB));
+ objs.add(blobWithSize(8 * GB));
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ assertEquals(3 * GB, index.getSize(0));
+ assertEquals(8 * GB, index.getSize(1));
+ assertEquals(0, index.getThreshold());
+ }
+
+ @Test
+ public void read_withMinSize() throws IOException {
+ int minSize = 1000;
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ PackObjectSizeIndexWriter writer = PackObjectSizeIndexWriter
+ .createWriter(out, minSize);
+ List<PackedObjectInfo> objs = new ArrayList<>();
+ objs.add(blobWithSize(3 * GB));
+ objs.add(blobWithSize(1500));
+ objs.add(blobWithSize(500));
+ objs.add(blobWithSize(1000));
+ objs.add(blobWithSize(2000));
+
+ writer.write(objs);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ PackObjectSizeIndex index = PackObjectSizeIndexLoader.load(in);
+ assertEquals(3 * GB, index.getSize(0));
+ assertEquals(1500, index.getSize(1));
+ assertEquals(-1, index.getSize(2));
+ assertEquals(1000, index.getSize(3));
+ assertEquals(2000, index.getSize(4));
+ assertEquals(minSize, index.getThreshold());
+ }
+
+ private static PackedObjectInfo blobWithSize(long size) {
+ return objInfo(Constants.OBJ_BLOB, size);
+ }
+
+ private static PackedObjectInfo objInfo(int type, long size) {
+ PackedObjectInfo objectInfo = new PackedObjectInfo(OBJ_ID);
+ objectInfo.setType(type);
+ objectInfo.setFullSize(size);
+ return objectInfo;
+ }
+}
--- /dev/null
+/*
+ * Copyright (C) 2022, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+
+import org.eclipse.jgit.util.NB;
+
+/**
+ * Memory representation of the object-size index
+ *
+ * The object size index is a map from position in the primary idx (i.e.
+ * position of the object-id in lexicographical order) to size.
+ *
+ * Most of the positions fit in unsigned 3 bytes (up to 16 million)
+ */
+class PackObjectSizeIndexV1 implements PackObjectSizeIndex {
+
+ private static final byte BITS_24 = 0x18;
+
+ private static final byte BITS_32 = 0x20;
+
+ private final int threshold;
+
+ private final UInt24Array positions24;
+
+ private final int[] positions32;
+
+ /**
+ * Parallel array to concat(positions24, positions32) with the size of the
+ * objects.
+ *
+ * A value >= 0 is the size of the object. A negative value means the size
+ * doesn't fit in an int and |value|-1 is the position for the size in the
+ * size64 array e.g. a value of -1 is sizes64[0], -2 = sizes64[1], ...
+ */
+ private final int[] sizes32;
+
+ private final long[] sizes64;
+
+ static PackObjectSizeIndex parse(InputStream in) throws IOException {
+ /** Header and version already out of the input */
+ IndexInputStreamReader stream = new IndexInputStreamReader(in);
+ int threshold = stream.readInt(); // minSize
+ int objCount = stream.readInt();
+ if (objCount == 0) {
+ return new EmptyPackObjectSizeIndex(threshold);
+ }
+ return new PackObjectSizeIndexV1(stream, threshold, objCount);
+ }
+
+ private PackObjectSizeIndexV1(IndexInputStreamReader stream, int threshold,
+ int objCount) throws IOException {
+ this.threshold = threshold;
+ UInt24Array pos24 = null;
+ int[] pos32 = null;
+
+ byte positionEncoding;
+ while ((positionEncoding = stream.readByte()) != 0) {
+ if (Byte.compareUnsigned(positionEncoding, BITS_24) == 0) {
+ int sz = stream.readInt();
+ pos24 = new UInt24Array(stream.readNBytes(sz * 3));
+ } else if (Byte.compareUnsigned(positionEncoding, BITS_32) == 0) {
+ int sz = stream.readInt();
+ pos32 = stream.readIntArray(sz);
+ } else {
+ throw new UnsupportedEncodingException(
+ String.format("Unknown position encoding %s",
+ Integer.toHexString(positionEncoding)));
+ }
+ }
+ positions24 = pos24 != null ? pos24 : UInt24Array.EMPTY;
+ positions32 = pos32 != null ? pos32 : new int[0];
+
+ sizes32 = stream.readIntArray(objCount);
+ int c64sizes = stream.readInt();
+ if (c64sizes == 0) {
+ sizes64 = new long[0];
+ return;
+ }
+ sizes64 = stream.readLongArray(c64sizes);
+ int c128sizes = stream.readInt();
+ if (c128sizes != 0) {
+ // this MUST be 0 (we don't support 128 bits sizes yet)
+ throw new IOException("Unsupported sizes in object-size-index");
+ }
+ }
+
+ @Override
+ public long getSize(int idxOffset) {
+ int pos = -1;
+ if (!positions24.isEmpty() && idxOffset <= positions24.getLastValue()) {
+ pos = positions24.binarySearch(idxOffset);
+ } else if (positions32.length > 0 && idxOffset >= positions32[0]) {
+ int pos32 = Arrays.binarySearch(positions32, idxOffset);
+ if (pos32 >= 0) {
+ pos = pos32 + positions24.size();
+ }
+ }
+ if (pos < 0) {
+ return -1;
+ }
+
+ int objSize = sizes32[pos];
+ if (objSize < 0) {
+ int secondPos = Math.abs(objSize) - 1;
+ return sizes64[secondPos];
+ }
+ return objSize;
+ }
+
+ @Override
+ public long getObjectCount() {
+ return positions24.size() + positions32.length;
+ }
+
+ @Override
+ public int getThreshold() {
+ return threshold;
+ }
+
+ /**
+ * Wrapper to read parsed content from the byte stream
+ */
+ private static class IndexInputStreamReader {
+
+ private final byte[] buffer = new byte[8];
+
+ private final InputStream in;
+
+ IndexInputStreamReader(InputStream in) {
+ this.in = in;
+ }
+
+ int readInt() throws IOException {
+ int n = in.readNBytes(buffer, 0, 4);
+ if (n < 4) {
+ throw new IOException(
+ "Unable to read a full int from the stream");
+ }
+ return NB.decodeInt32(buffer, 0);
+ }
+
+ int[] readIntArray(int intsCount) throws IOException {
+ if (intsCount == 0) {
+ return new int[0];
+ }
+
+ int[] dest = new int[intsCount];
+ for (int i = 0; i < intsCount; i++) {
+ dest[i] = readInt();
+ }
+ return dest;
+ }
+
+ long readLong() throws IOException {
+ int n = in.readNBytes(buffer, 0, 8);
+ if (n < 8) {
+ throw new IOException(
+ "Unable to read a full int from the stream");
+ }
+ return NB.decodeInt64(buffer, 0);
+ }
+
+ long[] readLongArray(int longsCount) throws IOException {
+ if (longsCount == 0) {
+ return new long[0];
+ }
+
+ long[] dest = new long[longsCount];
+ for (int i = 0; i < longsCount; i++) {
+ dest[i] = readLong();
+ }
+ return dest;
+ }
+
+ byte readByte() throws IOException {
+ int n = in.readNBytes(buffer, 0, 1);
+ if (n != 1) {
+ throw new IOException("Cannot read byte from stream");
+ }
+ return buffer[0];
+ }
+
+ byte[] readNBytes(int sz) throws IOException {
+ return in.readNBytes(sz);
+ }
+ }
+
+ private static class EmptyPackObjectSizeIndex
+ implements PackObjectSizeIndex {
+
+ private final int threshold;
+
+ EmptyPackObjectSizeIndex(int threshold) {
+ this.threshold = threshold;
+ }
+
+ @Override
+ public long getSize(int idxOffset) {
+ return -1;
+ }
+
+ @Override
+ public long getObjectCount() {
+ return 0;
+ }
+
+ @Override
+ public int getThreshold() {
+ return threshold;
+ }
+ }
+}
--- /dev/null
+/*
+ * Copyright (C) 2022, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.transport.PackedObjectInfo;
+import org.eclipse.jgit.util.NB;
+
+/**
+ * Write an object index in the output stream
+ */
+public abstract class PackObjectSizeIndexWriter {
+
+ private static final int MAX_24BITS_UINT = 0xffffff;
+
+ private static final PackObjectSizeIndexWriter NULL_WRITER = new PackObjectSizeIndexWriter() {
+ @Override
+ public void write(List<? extends PackedObjectInfo> objs) {
+ // Do nothing
+ }
+ };
+
+ /** Magic constant for the object size index file */
+ protected static final byte[] HEADER = { -1, 's', 'i', 'z' };
+
+ /**
+ * Returns a writer for the latest index version
+ *
+ * @param os
+ * Output stream where to write the index
+ * @param minSize
+ * objects strictly smaller than this size won't be added to the
+ * index. Negative size won't write AT ALL. Other sizes could write
+ * an empty index.
+ * @return the index writer
+ */
+ public static PackObjectSizeIndexWriter createWriter(OutputStream os,
+ int minSize) {
+ if (minSize < 0) {
+ return NULL_WRITER;
+ }
+ return new PackObjectSizeWriterV1(os, minSize);
+ }
+
+ /**
+ * Add the objects to the index
+ *
+ * @param objs
+ * objects in the pack, in sha1 order. Their position in the list
+ * matches their position in the primary index.
+ * @throws IOException
+ * problem writing to the stream
+ */
+ public abstract void write(List<? extends PackedObjectInfo> objs)
+ throws IOException;
+
+ /**
+ * Object size index v1.
+ *
+ * Store position (in the main index) to size as parallel arrays.
+ *
+ * <p>Positions in the main index fit well in unsigned 24 bits (16M) for most
+ * repositories, but some outliers have even more objects, so we need to
+ * store also 32 bits positions.
+ *
+ * <p>Sizes are stored as a first array parallel to positions. If a size
+ * doesn't fit in an element of that array, then we encode there a position
+ * on the next-size array. This "overflow" array doesn't have entries for
+ * all positions.
+ *
+ * <pre>
+ *
+ * positions [10, 500, 1000, 1001]
+ * sizes (32bits) [15MB, -1, 6MB, -2]
+ * ___/ ______/
+ * / /
+ * sizes (64 bits) [3GB, 6GB]
+ * </pre>
+ *
+ * <p>For sizes we use 32 bits as the first level and 64 for the rare objects
+ * over 2GB.
+ *
+ * <p>A 24/32/64 bits hierarchy of arrays saves space if we have a lot of small
+ * objects, but wastes space if we have only big ones. The min size to index is
+ * controlled by conf and in principle we want to index only rather
+ * big objects (e.g. > 10MB). We could support more dynamics read/write of sizes
+ * (e.g. 24 only if the threshold will include many of those objects) but it
+ * complicates a lot code and spec. If needed it could go for a v2 of the protocol.
+ *
+ * <p>Format:
+ *
+ * <li>A header with the magic number (4 bytes)
+ * <li>The index version (1 byte)
+ * <li>The minimum object size (4 bytes)
+ * <li>Total count of objects indexed (C, 4 bytes)
+ * (if count == 0, stop here)
+ *
+ * Blocks of
+ * <li>Size per entry in bits (1 byte, either 24 (0x18) or 32 (0x20))
+ * <li>Count of entries (4 bytes) (c, as a signed int)
+ * <li>positions encoded in s bytes each (i.e s*c bytes)
+ *
+ * <li>0 (as a "size-per-entry = 0", marking end of the section)
+ *
+ * <li>32 bit sizes (C * 4 bytes). Negative size means
+ * nextLevel[abs(size)-1]
+ * <li>Count of 64 bit sizes (s64) (or 0 if no more indirections)
+ * <li>64 bit sizes (s64 * 8 bytes)
+ * <li>0 (end)
+ */
+ static class PackObjectSizeWriterV1 extends PackObjectSizeIndexWriter {
+
+ private final OutputStream os;
+
+ private final int minObjSize;
+
+ private final byte[] intBuffer = new byte[4];
+
+ PackObjectSizeWriterV1(OutputStream os, int minSize) {
+ this.os = new BufferedOutputStream(os);
+ this.minObjSize = minSize;
+ }
+
+ @Override
+ public void write(List<? extends PackedObjectInfo> allObjects)
+ throws IOException {
+ os.write(HEADER);
+ writeUInt8(1); // Version
+ writeInt32(minObjSize);
+
+ PackedObjectStats stats = countIndexableObjects(allObjects);
+ int[] indexablePositions = findIndexablePositions(allObjects,
+ stats.indexableObjs);
+ writeInt32(indexablePositions.length); // Total # of objects
+ if (indexablePositions.length == 0) {
+ os.flush();
+ return;
+ }
+
+ // Positions that fit in 3 bytes
+ if (stats.pos24Bits > 0) {
+ writeUInt8(24);
+ writeInt32(stats.pos24Bits);
+ applyToRange(indexablePositions, 0, stats.pos24Bits,
+ this::writeInt24);
+ }
+ // Positions that fit in 4 bytes
+ // We only use 31 bits due to sign,
+ // but that covers 2 billion objs
+ if (stats.pos31Bits > 0) {
+ writeUInt8(32);
+ writeInt32(stats.pos31Bits);
+ applyToRange(indexablePositions, stats.pos24Bits,
+ stats.pos24Bits + stats.pos31Bits, this::writeInt32);
+ }
+ writeUInt8(0);
+ writeSizes(allObjects, indexablePositions, stats.sizeOver2GB);
+ os.flush();
+ }
+
+ private void writeUInt8(int i) throws IOException {
+ if (i > 255) {
+ throw new IllegalStateException(
+ "Number doesn't fit in a single byte");
+ }
+ NB.encodeInt32(intBuffer, 0, i);
+ os.write(intBuffer, 3, 1);
+ }
+
+ private void writeInt24(int i) throws IOException {
+ NB.encodeInt24(intBuffer, 1, i);
+ os.write(intBuffer, 1, 3);
+ }
+
+ private void writeInt32(int i) throws IOException {
+ NB.encodeInt32(intBuffer, 0, i);
+ os.write(intBuffer);
+ }
+
+ private void writeSizes(List<? extends PackedObjectInfo> allObjects,
+ int[] indexablePositions, int objsBiggerThan2Gb)
+ throws IOException {
+ if (indexablePositions.length == 0) {
+ writeInt32(0);
+ return;
+ }
+
+ byte[] sizes64bits = new byte[8 * objsBiggerThan2Gb];
+ int s64 = 0;
+ for (int i = 0; i < indexablePositions.length; i++) {
+ PackedObjectInfo info = allObjects.get(indexablePositions[i]);
+ if (info.getFullSize() < Integer.MAX_VALUE) {
+ writeInt32((int) info.getFullSize());
+ } else {
+ // Size needs more than 32 bits. Store -1 * offset in the
+ // next table as size.
+ writeInt32(-1 * (s64 + 1));
+ NB.encodeInt64(sizes64bits, s64 * 8, info.getFullSize());
+ s64++;
+ }
+ }
+ if (objsBiggerThan2Gb > 0) {
+ writeInt32(objsBiggerThan2Gb);
+ os.write(sizes64bits);
+ }
+ writeInt32(0);
+ }
+
+ private int[] findIndexablePositions(
+ List<? extends PackedObjectInfo> allObjects,
+ int indexableObjs) {
+ int[] positions = new int[indexableObjs];
+ int positionIdx = 0;
+ for (int i = 0; i < allObjects.size(); i++) {
+ PackedObjectInfo o = allObjects.get(i);
+ if (!shouldIndex(o)) {
+ continue;
+ }
+ positions[positionIdx++] = i;
+ }
+ return positions;
+ }
+
+ private PackedObjectStats countIndexableObjects(
+ List<? extends PackedObjectInfo> objs) {
+ PackedObjectStats stats = new PackedObjectStats();
+ for (int i = 0; i < objs.size(); i++) {
+ PackedObjectInfo o = objs.get(i);
+ if (!shouldIndex(o)) {
+ continue;
+ }
+ stats.indexableObjs++;
+ if (o.getFullSize() > Integer.MAX_VALUE) {
+ stats.sizeOver2GB++;
+ }
+ if (i <= MAX_24BITS_UINT) {
+ stats.pos24Bits++;
+ } else {
+ stats.pos31Bits++;
+ // i is a positive int, cannot be bigger than this
+ }
+ }
+ return stats;
+ }
+
+ private boolean shouldIndex(PackedObjectInfo o) {
+ return (o.getType() == Constants.OBJ_BLOB)
+ && (o.getFullSize() >= minObjSize);
+ }
+
+ private static class PackedObjectStats {
+ int indexableObjs;
+
+ int pos24Bits;
+
+ int pos31Bits;
+
+ int sizeOver2GB;
+ }
+
+ @FunctionalInterface
+ interface IntEncoder {
+ void encode(int i) throws IOException;
+ }
+
+ private static void applyToRange(int[] allPositions, int start, int end,
+ IntEncoder encoder) throws IOException {
+ for (int i = start; i < end; i++) {
+ encoder.encode(allPositions[i]);
+ }
+ }
+ }
+}