aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIvan Frade <ifrade@google.com>2022-01-05 09:50:54 -0800
committerIvan Frade <ifrade@google.com>2025-07-21 13:28:27 +0300
commitd9ac2f31e0ac931e57ec11f0ce76eaea8b459c75 (patch)
tree64445d373be5c70b5f587b73a8fc1465cde51224
parent8929300aa80fb11fce2e6b3f411de02c716e7afb (diff)
downloadjgit-d9ac2f31e0ac931e57ec11f0ce76eaea8b459c75.tar.gz
jgit-d9ac2f31e0ac931e57ec11f0ce76eaea8b459c75.zip
ObjectDirectoryPackParser: Write object-size index with the pack
ObjectDirectoryPack{Parser,Inserter} is not symmetrical to the Pack{Parser,Inserter} combination. In the Pack* version, the inserter takes care of writing the indices on #flush. In the ObjectDirectory* size, this is done by the parser. Make ObjectDirectoryPackParser write the object size index. Change-Id: I5a1c091857928b141a07920a5c10e931ffe9bfa2
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java227
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java48
2 files changed, 275 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java
new file mode 100644
index 0000000000..b17c577087
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2021, Google LLC. and others
+ * Copyright (C) 2008, Imran M Yousuf <imyousuf@smartitengineering.com>
+ * Copyright (C) 2007-2008, Robin Rosenberg <robin.rosenberg@dewire.com>
+ * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.transport;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.MessageDigest;
+import java.util.zip.Deflater;
+
+import org.eclipse.jgit.internal.storage.file.ObjectDirectoryPackParser;
+import org.eclipse.jgit.internal.storage.file.Pack;
+import org.eclipse.jgit.junit.JGitTestUtil;
+import org.eclipse.jgit.junit.RepositoryTestCase;
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.lib.ConfigConstants;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectInserter;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevBlob;
+import org.eclipse.jgit.storage.file.FileBasedConfig;
+import org.eclipse.jgit.util.NB;
+import org.eclipse.jgit.util.TemporaryBuffer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Pack parsing is covered in {@link PackParserTest}.
+ *
+ * Here we test ObjectDirectoryPackParser specific parts. e.g. that is creates
+ * the object-size index.
+ */
+public class ObjectDirectoryPackParserTest extends RepositoryTestCase {
+
+ @Before
+ public void setup() throws IOException {
+ FileBasedConfig jGitConfig = mockSystemReader.getJGitConfig();
+ jGitConfig.setInt(ConfigConstants.CONFIG_PACK_SECTION, null,
+ ConfigConstants.CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX, 7);
+ jGitConfig.save();
+ }
+
+ /**
+ * Test indexing one of the test packs in the egit repo. It has deltas.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testGitPack() throws IOException {
+ File packFile = JGitTestUtil.getTestResourceFile("pack-34be9032ac282b11fa9babdc2b2a93ca996c9c2f.pack");
+ try (InputStream is = new FileInputStream(packFile)) {
+ ObjectDirectoryPackParser p = index(is);
+ p.parse(NullProgressMonitor.INSTANCE);
+
+ Pack pack = p.getPack();
+ assertTrue(pack.hasObjectSizeIndex());
+
+ // Only blobs in the pack
+ ObjectId blob1 = ObjectId
+ .fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3");
+ ObjectId blob2 = ObjectId
+ .fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259");
+ assertEquals(18787, pack.getIndexedObjectSize(blob1));
+ assertEquals(18009, pack.getIndexedObjectSize(blob2));
+
+ // Indexed sizes match object db sizes
+ assertEquals(db.getObjectDatabase().open(blob1).getSize(),
+ pack.getIndexedObjectSize(blob1));
+ assertEquals(db.getObjectDatabase().open(blob2).getSize(),
+ pack.getIndexedObjectSize(blob2));
+
+ }
+ }
+
+ /**
+ * This is just another pack. It so happens that we have two convenient pack to
+ * test with in the repository.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testAnotherGitPack() throws IOException {
+ File packFile = JGitTestUtil.getTestResourceFile("pack-df2982f284bbabb6bdb59ee3fcc6eb0983e20371.pack");
+ try (InputStream is = new FileInputStream(packFile)) {
+ ObjectDirectoryPackParser p = index(is);
+ p.parse(NullProgressMonitor.INSTANCE);
+ Pack pack = p.getPack();
+
+ // Blob smaller than threshold:
+ assertEquals(-1, pack.getIndexedObjectSize(ObjectId
+ .fromString("15fae9e651043de0fd1deef588aa3fbf5a7a41c6")));
+
+ // Blob bigger than threshold
+ assertEquals(10, pack.getIndexedObjectSize(ObjectId
+ .fromString("8230f48330e0055d9e0bc5a2a77718f6dd9324b8")));
+
+ // A commit (not indexed)
+ assertEquals(-1, pack.getIndexedObjectSize(ObjectId
+ .fromString("d0114ab8ac326bab30e3a657a0397578c5a1af88")));
+
+ // Object not in pack
+ assertEquals(-1, pack.getIndexedObjectSize(ObjectId
+ .fromString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")));
+ }
+ }
+
+ @Test
+ public void testTinyThinPack() throws Exception {
+ // less than 16 bytes, so its length fits in a single byte later
+ String base = "abcdefghijklmn";
+ RevBlob a;
+ try (TestRepository d = new TestRepository<Repository>(db)) {
+ a = d.blob(base);
+ }
+
+ TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(1024);
+
+ packHeader(pack, 1);
+
+ pack.write(Constants.OBJ_REF_DELTA << 4 | 4);
+ a.copyRawTo(pack);
+ deflate(pack, new byte[] { (byte) base.length(), // size of the base
+ (byte) (base.length() + 1), // size after reconstruction
+ 0x1, 'b' }); // append one byte
+
+ digest(pack);
+
+ ObjectDirectoryPackParser p = index(new ByteArrayInputStream(pack.toByteArray()));
+ p.setAllowThin(true);
+ p.parse(NullProgressMonitor.INSTANCE);
+
+ Pack writtenPack = p.getPack();
+ // base
+ assertEquals(base.length(), writtenPack.getIndexedObjectSize(a));
+ // undeltified blob
+ assertEquals(base.length() + 1,
+ writtenPack.getIndexedObjectSize(ObjectId.fromString(
+ "f177875498138143c9657cc52b049ad4d20d5223")));
+ }
+
+ @Test
+ public void testPackWithDuplicateBlob() throws Exception {
+ final byte[] data = Constants.encode("0123456789abcdefg");
+ RevBlob blob;
+ try (TestRepository<Repository> d = new TestRepository<>(db)) {
+ blob = d.blob(data);
+ assertTrue(db.getObjectDatabase().has(blob));
+ }
+
+ TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(1024);
+ packHeader(pack, 1);
+ pack.write(Constants.OBJ_BLOB << 4 | 0x80 | 1);
+ pack.write(1);
+ deflate(pack, data);
+ digest(pack);
+
+ ObjectDirectoryPackParser p = index(
+ new ByteArrayInputStream(pack.toByteArray()));
+ p.setAllowThin(false);
+ p.parse(NullProgressMonitor.INSTANCE);
+
+ assertEquals(data.length, p.getPack().getIndexedObjectSize(blob));
+ }
+
+ private static void packHeader(TemporaryBuffer.Heap tinyPack, int cnt)
+ throws IOException {
+ final byte[] hdr = new byte[8];
+ NB.encodeInt32(hdr, 0, 2);
+ NB.encodeInt32(hdr, 4, cnt);
+
+ tinyPack.write(Constants.PACK_SIGNATURE);
+ tinyPack.write(hdr, 0, 8);
+ }
+
+ private static void deflate(TemporaryBuffer.Heap tinyPack,
+ final byte[] content)
+ throws IOException {
+ final Deflater deflater = new Deflater();
+ final byte[] buf = new byte[128];
+ deflater.setInput(content, 0, content.length);
+ deflater.finish();
+ do {
+ final int n = deflater.deflate(buf, 0, buf.length);
+ if (n > 0)
+ tinyPack.write(buf, 0, n);
+ } while (!deflater.finished());
+ }
+
+ private static void digest(TemporaryBuffer.Heap buf) throws IOException {
+ MessageDigest md = Constants.newMessageDigest();
+ md.update(buf.toByteArray());
+ buf.write(md.digest());
+ }
+
+ private ObjectInserter inserter;
+
+ @After
+ public void release() {
+ if (inserter != null) {
+ inserter.close();
+ }
+ }
+
+ private ObjectDirectoryPackParser index(InputStream in) throws IOException {
+ if (inserter == null)
+ inserter = db.newObjectInserter();
+ return (ObjectDirectoryPackParser) inserter.newPackParser(in);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java
index 746e124e1f..d97d5a7ccd 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java
@@ -72,6 +72,12 @@ public class ObjectDirectoryPackParser extends PackParser {
*/
private File tmpIdx;
+ /**
+ * Path of the object-size index created for the pack, to filter quickly
+ * objects by size in partial clones
+ */
+ private File tmpObjectSizeIndex;
+
/** Read/write handle to {@link #tmpPack} while it is being parsed. */
private RandomAccessFile out;
@@ -163,6 +169,7 @@ public class ObjectDirectoryPackParser extends PackParser {
throws IOException {
tmpPack = File.createTempFile("incoming_", ".pack", db.getDirectory()); //$NON-NLS-1$ //$NON-NLS-2$
tmpIdx = new File(db.getDirectory(), baseName(tmpPack) + ".idx"); //$NON-NLS-1$
+
try {
out = new RandomAccessFile(tmpPack, "rw"); //$NON-NLS-1$
@@ -178,6 +185,14 @@ public class ObjectDirectoryPackParser extends PackParser {
tmpPack.setReadOnly();
tmpIdx.setReadOnly();
+ if (pconfig.isWriteObjSizeIndex()) {
+ tmpObjectSizeIndex = new File(db.getDirectory(),
+ baseName(tmpPack)
+ + PackExt.OBJECT_SIZE_INDEX.getExtension());
+ writeObjectSizeIndex(pconfig.getMinBytesForObjSizeIndex());
+ tmpObjectSizeIndex.setReadOnly();
+ }
+
return renameAndOpenPack(getLockMessage());
} finally {
if (def != null)
@@ -295,6 +310,9 @@ public class ObjectDirectoryPackParser extends PackParser {
tmpIdx.deleteOnExit();
if (tmpPack != null && !tmpPack.delete() && tmpPack.exists())
tmpPack.deleteOnExit();
+ if (tmpObjectSizeIndex != null && !tmpObjectSizeIndex.delete()
+ && tmpObjectSizeIndex.exists())
+ tmpPack.deleteOnExit();
}
@Override
@@ -395,6 +413,15 @@ public class ObjectDirectoryPackParser extends PackParser {
}
}
+ private void writeObjectSizeIndex(int minSize) throws IOException {
+ try (FileOutputStream os = new FileOutputStream(tmpObjectSizeIndex)) {
+ PackObjectSizeIndexWriter iw = PackObjectSizeIndexWriter
+ .createWriter(os, minSize);
+ iw.write(getSortedObjectList(null));
+ os.getChannel().force(true);
+ }
+ }
+
private PackLock renameAndOpenPack(String lockMessage)
throws IOException {
if (!keepEmpty && getObjectCount() == 0) {
@@ -469,6 +496,27 @@ public class ObjectDirectoryPackParser extends PackParser {
JGitText.get().cannotMoveIndexTo, finalIdx), e);
}
+ if (pconfig.isWriteObjSizeIndex() && tmpObjectSizeIndex != null) {
+ PackFile finalObjectSizeIndex = finalPack
+ .create(PackExt.OBJECT_SIZE_INDEX);
+ try {
+ FileUtils.rename(tmpObjectSizeIndex, finalObjectSizeIndex,
+ StandardCopyOption.ATOMIC_MOVE);
+ } catch (IOException e) {
+ cleanupTemporaryFiles();
+ keep.unlock();
+ if (!finalPack.delete())
+ finalPack.deleteOnExit();
+ if (!finalIdx.delete()) {
+ finalIdx.deleteOnExit();
+ }
+ throw new IOException(MessageFormat
+ .format(JGitText.get().cannotMoveIndexTo,
+ finalObjectSizeIndex),
+ e);
+ }
+ }
+
boolean interrupted = false;
try {
FileSnapshot snapshot = FileSnapshot.save(finalPack);