--- /dev/null
+/*
+ * Copyright (C) 2023, Google LLC. and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.dfs;
+
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX;
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_PACK_SECTION;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.eclipse.jgit.junit.JGitTestUtil;
+import org.eclipse.jgit.junit.TestRng;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectInserter;
+import org.junit.Before;
+import org.junit.Test;
+
+public class DfsReaderTest {
+ InMemoryRepository db;
+
+ @Before
+ public void setUp() {
+ db = new InMemoryRepository(new DfsRepositoryDescription("test"));
+ }
+
+ @Test
+ public void isNotLargerThan_noObjectSizeIndex() throws IOException {
+ setObjectSizeIndexMinBytes(-1);
+ ObjectId obj = insertBlobWithSize(10);
+ try (DfsReader ctx = db.getObjectDatabase().newReader()) {
+ assertFalse(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 0));
+ assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 10));
+ assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 40));
+ assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 50));
+ assertTrue(ctx.isNotLargerThan(obj, Constants.OBJ_BLOB, 100));
+
+ assertEquals(5, ctx.stats.isNotLargerThanCallCount);
+ assertEquals(0, ctx.stats.objectSizeIndexMiss);
+ assertEquals(0, ctx.stats.objectSizeIndexHit);
+ }
+ }
+
+ private ObjectId insertBlobWithSize(int size)
+ throws IOException {
+ TestRng testRng = new TestRng(JGitTestUtil.getName());
+ ObjectId oid;
+ try (ObjectInserter ins = db.newObjectInserter()) {
+ oid = ins.insert(Constants.OBJ_BLOB,
+ testRng.nextBytes(size));
+ ins.flush();
+ }
+ return oid;
+ }
+
+ private void setObjectSizeIndexMinBytes(int threshold) {
+ db.getConfig().setInt(CONFIG_PACK_SECTION, null,
+ CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX, threshold);
+ }
+}
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.OBJECT_SIZE_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REVERSE_INDEX;
import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphLoader;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndex;
+import org.eclipse.jgit.internal.storage.file.PackObjectSizeIndexLoader;
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
import org.eclipse.jgit.internal.storage.file.PackReverseIndexFactory;
import org.eclipse.jgit.internal.storage.pack.BinaryDelta;
/** Index of compressed commit graph mapping entire object graph. */
private volatile CommitGraph commitGraph;
+ /** Index by size */
+ private boolean objectSizeIndexLoadAttempted;
+
+ private volatile PackObjectSizeIndex objectSizeIndex;
+
/**
* Objects we have tried to read, and discovered to be corrupt.
* <p>
return reverseIndex;
}
+ private PackObjectSizeIndex getObjectSizeIndex(DfsReader ctx)
+ throws IOException {
+ if (objectSizeIndex != null) {
+ return objectSizeIndex;
+ }
+
+ if (objectSizeIndexLoadAttempted) {
+ // Pack doesn't have object size index
+ return null;
+ }
+
+ DfsStreamKey objSizeKey = desc.getStreamKey(OBJECT_SIZE_INDEX);
+ AtomicBoolean cacheHit = new AtomicBoolean(true);
+ try {
+ DfsBlockCache.Ref<PackObjectSizeIndex> sizeIdxRef = cache
+ .getOrLoadRef(objSizeKey, REF_POSITION, () -> {
+ cacheHit.set(false);
+ return loadObjectSizeIndex(ctx, objSizeKey);
+ });
+ if (cacheHit.get()) {
+ ctx.stats.objectSizeIndexCacheHit++;
+ }
+ PackObjectSizeIndex sizeIdx = sizeIdxRef.get();
+ if (sizeIdx != null) {
+ objectSizeIndex = sizeIdx;
+ }
+ } finally {
+ objectSizeIndexLoadAttempted = true;
+ }
+
+ return objectSizeIndex;
+ }
+
/**
* Check if an object is stored within this pack.
*
}
}
+ /**
+ * Return if this packfile has an object size index attached.
+ *
+ * This loads the index if it is not already in memory.
+ *
+ * @param ctx
+ * reader context to support reading from the backing store if
+ * the object size index is not already loaded in memory.
+ * @return true if the packfile has an object size index.
+ * @throws IOException
+ * a problem accessing storage while looking for the index
+ */
+ boolean hasObjectSizeIndex(DfsReader ctx) throws IOException {
+ return getObjectSizeIndex(ctx) != null;
+ }
+
+ /**
+ * Return minimum size for an object to be included in the object size
+ * index.
+ *
+ * Caller must make sure the pack has an object size index with
+ * {@link #hasObjectSizeIndex} before calling this method.
+ *
+ * @param ctx
+ * reader context to support reading from the backing store if
+ * the object size index is not already loaded in memory.
+ * @return minimum size for indexing in bytes
+ * @throws IOException
+ * no object size index or a problem loading it.
+ */
+ int getObjectSizeIndexThreshold(DfsReader ctx) throws IOException {
+ PackObjectSizeIndex index = getObjectSizeIndex(ctx);
+ if (index == null) {
+ throw new IOException("Asking threshold of non-existing obj-size"); //$NON-NLS-1$
+ }
+ return index.getThreshold();
+ }
+
+ /**
+ * Return the size of the object from the object-size index. The object
+ * should be a blob. Any other type is not indexed and returns -1.
+ *
+ * Caller MUST be sure that the object is in the pack (e.g. with
+ * {@link #hasObject(DfsReader, AnyObjectId)}) and the pack has object size
+ * index (e.g. with {@link #hasObjectSizeIndex(DfsReader)}) before asking
+ * the indexed size.
+ *
+ * @param ctx
+ * reader context to support reading from the backing store if
+ * the object size index is not already loaded in memory.
+ * @param id
+ * object id of an object in the pack
+ * @return size of the object from the index. Negative if object is not in
+ * the index (below threshold or not a blob)
+ * @throws IOException
+ * could not read the object size index. IO problem or the pack
+ * doesn't have it.
+ */
+ long getIndexedObjectSize(DfsReader ctx, AnyObjectId id)
+ throws IOException {
+ int idxPosition = idx(ctx).findPosition(id);
+ if (idxPosition < 0) {
+ throw new IllegalArgumentException(
+ "Cannot get size from index since object is not in pack"); //$NON-NLS-1$
+ }
+
+ PackObjectSizeIndex sizeIdx = getObjectSizeIndex(ctx);
+ if (sizeIdx == null) {
+ throw new IllegalStateException(
+ "Asking indexed size from a pack without object size index"); //$NON-NLS-1$
+ }
+
+ return sizeIdx.getSize(idxPosition);
+ }
+
void representation(DfsObjectRepresentation r, final long pos,
DfsReader ctx, PackReverseIndex rev)
throws IOException {
revidx);
}
+ private DfsBlockCache.Ref<PackObjectSizeIndex> loadObjectSizeIndex(
+ DfsReader ctx, DfsStreamKey objectSizeIndexKey) throws IOException {
+ ctx.stats.readObjectSizeIndex++;
+ long start = System.nanoTime();
+ long size = 0;
+ Exception parsingError = null;
+ try (ReadableChannel rc = ctx.db.openFile(desc, OBJECT_SIZE_INDEX)) {
+ try {
+ objectSizeIndex = PackObjectSizeIndexLoader
+ .load(Channels.newInputStream(rc));
+ size = rc.position();
+ } catch (IOException e) {
+ parsingError = e;
+ }
+ } catch (IOException e) {
+ // No object size index in this pack
+ return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, 0,
+ null);
+ }
+
+ if (parsingError != null) {
+ throw new IOException(
+ MessageFormat.format(DfsText.get().shortReadOfIndex,
+ desc.getFileName(OBJECT_SIZE_INDEX)),
+ parsingError);
+ }
+
+ ctx.stats.readObjectSizeIndexBytes += size;
+ ctx.stats.readObjectSizeIndexMicros += elapsedMicros(start);
+ return new DfsBlockCache.Ref<>(objectSizeIndexKey, REF_POSITION, size,
+ objectSizeIndex);
+ }
+
private DfsBlockCache.Ref<PackBitmapIndex> loadBitmapIndex(DfsReader ctx,
DfsStreamKey bitmapKey) throws IOException {
ctx.stats.readBitmap++;
import org.eclipse.jgit.lib.AsyncObjectSizeQueue;
import org.eclipse.jgit.lib.BitmapIndex;
import org.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
+import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.InflaterCache;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
throw new MissingObjectException(objectId.copy(), typeHint);
}
+
+ @Override
+ public boolean isNotLargerThan(AnyObjectId objectId, int typeHint,
+ long limit) throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ DfsPackFile pack = findPackWithObject(objectId);
+ if (pack == null) {
+ if (typeHint == OBJ_ANY) {
+ throw new MissingObjectException(objectId.copy(),
+ JGitText.get().unknownObjectType2);
+ }
+ throw new MissingObjectException(objectId.copy(), typeHint);
+ }
+
+ stats.isNotLargerThanCallCount += 1;
+ if (typeHint != Constants.OBJ_BLOB || !pack.hasObjectSizeIndex(this)) {
+ return pack.getObjectSize(this, objectId) <= limit;
+ }
+
+ long sz = pack.getIndexedObjectSize(this, objectId);
+ if (sz < 0) {
+ stats.objectSizeIndexMiss += 1;
+ } else {
+ stats.objectSizeIndexHit += 1;
+ }
+
+ // Got size from index or we didn't but we are sure it should be there.
+ if (sz >= 0 || pack.getObjectSizeIndexThreshold(this) <= limit) {
+ return sz <= limit;
+ }
+
+ return pack.getObjectSize(this, objectId) <= limit;
+ }
+
+ private DfsPackFile findPackWithObject(AnyObjectId objectId)
+ throws IOException {
+ if (last != null && !skipGarbagePack(last)
+ && last.hasObject(this, objectId)) {
+ return last;
+ }
+ PackList packList = db.getPackList();
+ // hasImpl doesn't check "last", but leaves "last" pointing to the pack
+ // with the object
+ if (hasImpl(packList, objectId)) {
+ return last;
+ } else if (packList.dirty()) {
+ if (hasImpl(db.getPackList(), objectId)) {
+ return last;
+ }
+ }
+ return null;
+ }
+
private long getObjectSizeImpl(PackList packList, AnyObjectId objectId)
throws IOException {
for (DfsPackFile pack : packList.packs) {
package org.eclipse.jgit.internal.storage.dfs;
+import org.eclipse.jgit.lib.AnyObjectId;
+
/**
* IO statistics for a {@link org.eclipse.jgit.internal.storage.dfs.DfsReader}.
*/
public class DfsReaderIoStats {
/** POJO to accumulate IO statistics. */
public static class Accumulator {
+
/** Number of times the reader explicitly called scanPacks. */
long scanPacks;
/** Total number of cache hits for commit graphs. */
long commitGraphCacheHit;
+ /** Total number of cache hits for object size indexes. */
+ long objectSizeIndexCacheHit;
+
/** Total number of complete pack indexes read into memory. */
long readIdx;
/** Total number of complete commit graphs read into memory. */
long readCommitGraph;
+ /** Total number of object size indexes added into memory. */
+ long readObjectSizeIndex;
+
/** Total number of bytes read from pack indexes. */
long readIdxBytes;
/** Total number of bytes read from commit graphs. */
long readCommitGraphBytes;
+ /** Total numer of bytes read from object size index */
+ long readObjectSizeIndexBytes;
+
/** Total microseconds spent reading pack indexes. */
long readIdxMicros;
/** Total microseconds spent creating commit graphs. */
long readCommitGraphMicros;
+ /** Total microseconds spent creating object size indexes */
+ long readObjectSizeIndexMicros;
+
/** Total number of bytes read from bitmap indexes. */
long readBitmapIdxBytes;
/** Total microseconds spent inflating compressed bytes. */
long inflationMicros;
+ /** Count of queries for the size of an object via #isNotLargerThan */
+ long isNotLargerThanCallCount;
+
+ /** Object was below threshold in the object size index */
+ long objectSizeIndexMiss;
+
+ /** Object size found in the object size index */
+ long objectSizeIndexHit;
+
Accumulator() {
}
}
return stats.commitGraphCacheHit;
}
+ /**
+ * Get total number of object size index cache hits.
+ *
+ * @return total number of object size index cache hits.
+ */
+ public long getObjectSizeIndexCacheHits() {
+ return stats.objectSizeIndexCacheHit;
+ }
+
/**
* Get total number of complete pack indexes read into memory.
*
return stats.readBitmap;
}
+ /**
+ * Get total number of complete object size indexes read into memory.
+ *
+ * @return total number of complete object size indexes read into memory.
+ */
+ public long getReadObjectSizeIndexCount() {
+ return stats.readObjectSizeIndex;
+ }
+
/**
* Get total number of bytes read from pack indexes.
*
public long getInflationMicros() {
return stats.inflationMicros;
}
+
+ /**
+ * Get count of invocations to
+ * {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)}
+ * <p>
+ * Each call could use the object-size index or not.
+ *
+ * @return how many times the size of an object was checked with
+ * {@link DfsReader#isNotLargerThan(AnyObjectId, int, long)}
+ */
+ public long getIsNotLargerThanCallCount() {
+ return stats.isNotLargerThanCallCount;
+ }
+
+ /**
+ * Get number of times the size of a blob was found in the object size
+ * index.
+ * <p>
+ * This counts only queries for blobs on packs with object size index.
+ *
+ * @return count of object size index hits
+ */
+ public long getObjectSizeIndexHits() {
+ return stats.objectSizeIndexHit;
+ }
+
+ /**
+ * Get number of times the size of an object was not found in the object
+ * size index. This usually means it was below the threshold.
+ * <p>
+ * This counts only queries for blobs on packs with object size index.
+ *
+ * @return count of object size index misses.
+ */
+ public long getObjectSizeIndexMisses() {
+ return stats.objectSizeIndexMiss;
+ }
}