diff options
author | Ivan Frade <ifrade@google.com> | 2022-01-05 09:50:54 -0800 |
---|---|---|
committer | Ivan Frade <ifrade@google.com> | 2025-07-21 13:28:27 +0300 |
commit | d9ac2f31e0ac931e57ec11f0ce76eaea8b459c75 (patch) | |
tree | 64445d373be5c70b5f587b73a8fc1465cde51224 | |
parent | 8929300aa80fb11fce2e6b3f411de02c716e7afb (diff) | |
download | jgit-d9ac2f31e0ac931e57ec11f0ce76eaea8b459c75.tar.gz jgit-d9ac2f31e0ac931e57ec11f0ce76eaea8b459c75.zip |
ObjectDirectoryPackParser: Write object-size index with the pack
ObjectDirectoryPack{Parser,Inserter} is not symmetrical to the
Pack{Parser,Inserter} combination. In the Pack* version, the inserter
takes care of writing the indices on #flush. In the ObjectDirectory*
size, this is done by the parser.
Make ObjectDirectoryPackParser write the object size index.
Change-Id: I5a1c091857928b141a07920a5c10e931ffe9bfa2
2 files changed, 275 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java new file mode 100644 index 0000000000..b17c577087 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/ObjectDirectoryPackParserTest.java @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2021, Google LLC. and others + * Copyright (C) 2008, Imran M Yousuf <imyousuf@smartitengineering.com> + * Copyright (C) 2007-2008, Robin Rosenberg <robin.rosenberg@dewire.com> + * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.transport; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.security.MessageDigest; +import java.util.zip.Deflater; + +import org.eclipse.jgit.internal.storage.file.ObjectDirectoryPackParser; +import org.eclipse.jgit.internal.storage.file.Pack; +import org.eclipse.jgit.junit.JGitTestUtil; +import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.ConfigConstants; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevBlob; +import org.eclipse.jgit.storage.file.FileBasedConfig; +import org.eclipse.jgit.util.NB; +import org.eclipse.jgit.util.TemporaryBuffer; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Pack parsing is covered in {@link PackParserTest}. + * + * Here we test ObjectDirectoryPackParser specific parts. e.g. that is creates + * the object-size index. + */ +public class ObjectDirectoryPackParserTest extends RepositoryTestCase { + + @Before + public void setup() throws IOException { + FileBasedConfig jGitConfig = mockSystemReader.getJGitConfig(); + jGitConfig.setInt(ConfigConstants.CONFIG_PACK_SECTION, null, + ConfigConstants.CONFIG_KEY_MIN_BYTES_OBJ_SIZE_INDEX, 7); + jGitConfig.save(); + } + + /** + * Test indexing one of the test packs in the egit repo. It has deltas. + * + * @throws IOException + */ + @Test + public void testGitPack() throws IOException { + File packFile = JGitTestUtil.getTestResourceFile("pack-34be9032ac282b11fa9babdc2b2a93ca996c9c2f.pack"); + try (InputStream is = new FileInputStream(packFile)) { + ObjectDirectoryPackParser p = index(is); + p.parse(NullProgressMonitor.INSTANCE); + + Pack pack = p.getPack(); + assertTrue(pack.hasObjectSizeIndex()); + + // Only blobs in the pack + ObjectId blob1 = ObjectId + .fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3"); + ObjectId blob2 = ObjectId + .fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"); + assertEquals(18787, pack.getIndexedObjectSize(blob1)); + assertEquals(18009, pack.getIndexedObjectSize(blob2)); + + // Indexed sizes match object db sizes + assertEquals(db.getObjectDatabase().open(blob1).getSize(), + pack.getIndexedObjectSize(blob1)); + assertEquals(db.getObjectDatabase().open(blob2).getSize(), + pack.getIndexedObjectSize(blob2)); + + } + } + + /** + * This is just another pack. It so happens that we have two convenient pack to + * test with in the repository. + * + * @throws IOException + */ + @Test + public void testAnotherGitPack() throws IOException { + File packFile = JGitTestUtil.getTestResourceFile("pack-df2982f284bbabb6bdb59ee3fcc6eb0983e20371.pack"); + try (InputStream is = new FileInputStream(packFile)) { + ObjectDirectoryPackParser p = index(is); + p.parse(NullProgressMonitor.INSTANCE); + Pack pack = p.getPack(); + + // Blob smaller than threshold: + assertEquals(-1, pack.getIndexedObjectSize(ObjectId + .fromString("15fae9e651043de0fd1deef588aa3fbf5a7a41c6"))); + + // Blob bigger than threshold + assertEquals(10, pack.getIndexedObjectSize(ObjectId + .fromString("8230f48330e0055d9e0bc5a2a77718f6dd9324b8"))); + + // A commit (not indexed) + assertEquals(-1, pack.getIndexedObjectSize(ObjectId + .fromString("d0114ab8ac326bab30e3a657a0397578c5a1af88"))); + + // Object not in pack + assertEquals(-1, pack.getIndexedObjectSize(ObjectId + .fromString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))); + } + } + + @Test + public void testTinyThinPack() throws Exception { + // less than 16 bytes, so its length fits in a single byte later + String base = "abcdefghijklmn"; + RevBlob a; + try (TestRepository d = new TestRepository<Repository>(db)) { + a = d.blob(base); + } + + TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(1024); + + packHeader(pack, 1); + + pack.write(Constants.OBJ_REF_DELTA << 4 | 4); + a.copyRawTo(pack); + deflate(pack, new byte[] { (byte) base.length(), // size of the base + (byte) (base.length() + 1), // size after reconstruction + 0x1, 'b' }); // append one byte + + digest(pack); + + ObjectDirectoryPackParser p = index(new ByteArrayInputStream(pack.toByteArray())); + p.setAllowThin(true); + p.parse(NullProgressMonitor.INSTANCE); + + Pack writtenPack = p.getPack(); + // base + assertEquals(base.length(), writtenPack.getIndexedObjectSize(a)); + // undeltified blob + assertEquals(base.length() + 1, + writtenPack.getIndexedObjectSize(ObjectId.fromString( + "f177875498138143c9657cc52b049ad4d20d5223"))); + } + + @Test + public void testPackWithDuplicateBlob() throws Exception { + final byte[] data = Constants.encode("0123456789abcdefg"); + RevBlob blob; + try (TestRepository<Repository> d = new TestRepository<>(db)) { + blob = d.blob(data); + assertTrue(db.getObjectDatabase().has(blob)); + } + + TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(1024); + packHeader(pack, 1); + pack.write(Constants.OBJ_BLOB << 4 | 0x80 | 1); + pack.write(1); + deflate(pack, data); + digest(pack); + + ObjectDirectoryPackParser p = index( + new ByteArrayInputStream(pack.toByteArray())); + p.setAllowThin(false); + p.parse(NullProgressMonitor.INSTANCE); + + assertEquals(data.length, p.getPack().getIndexedObjectSize(blob)); + } + + private static void packHeader(TemporaryBuffer.Heap tinyPack, int cnt) + throws IOException { + final byte[] hdr = new byte[8]; + NB.encodeInt32(hdr, 0, 2); + NB.encodeInt32(hdr, 4, cnt); + + tinyPack.write(Constants.PACK_SIGNATURE); + tinyPack.write(hdr, 0, 8); + } + + private static void deflate(TemporaryBuffer.Heap tinyPack, + final byte[] content) + throws IOException { + final Deflater deflater = new Deflater(); + final byte[] buf = new byte[128]; + deflater.setInput(content, 0, content.length); + deflater.finish(); + do { + final int n = deflater.deflate(buf, 0, buf.length); + if (n > 0) + tinyPack.write(buf, 0, n); + } while (!deflater.finished()); + } + + private static void digest(TemporaryBuffer.Heap buf) throws IOException { + MessageDigest md = Constants.newMessageDigest(); + md.update(buf.toByteArray()); + buf.write(md.digest()); + } + + private ObjectInserter inserter; + + @After + public void release() { + if (inserter != null) { + inserter.close(); + } + } + + private ObjectDirectoryPackParser index(InputStream in) throws IOException { + if (inserter == null) + inserter = db.newObjectInserter(); + return (ObjectDirectoryPackParser) inserter.newPackParser(in); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java index 746e124e1f..d97d5a7ccd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryPackParser.java @@ -72,6 +72,12 @@ public class ObjectDirectoryPackParser extends PackParser { */ private File tmpIdx; + /** + * Path of the object-size index created for the pack, to filter quickly + * objects by size in partial clones + */ + private File tmpObjectSizeIndex; + /** Read/write handle to {@link #tmpPack} while it is being parsed. */ private RandomAccessFile out; @@ -163,6 +169,7 @@ public class ObjectDirectoryPackParser extends PackParser { throws IOException { tmpPack = File.createTempFile("incoming_", ".pack", db.getDirectory()); //$NON-NLS-1$ //$NON-NLS-2$ tmpIdx = new File(db.getDirectory(), baseName(tmpPack) + ".idx"); //$NON-NLS-1$ + try { out = new RandomAccessFile(tmpPack, "rw"); //$NON-NLS-1$ @@ -178,6 +185,14 @@ public class ObjectDirectoryPackParser extends PackParser { tmpPack.setReadOnly(); tmpIdx.setReadOnly(); + if (pconfig.isWriteObjSizeIndex()) { + tmpObjectSizeIndex = new File(db.getDirectory(), + baseName(tmpPack) + + PackExt.OBJECT_SIZE_INDEX.getExtension()); + writeObjectSizeIndex(pconfig.getMinBytesForObjSizeIndex()); + tmpObjectSizeIndex.setReadOnly(); + } + return renameAndOpenPack(getLockMessage()); } finally { if (def != null) @@ -295,6 +310,9 @@ public class ObjectDirectoryPackParser extends PackParser { tmpIdx.deleteOnExit(); if (tmpPack != null && !tmpPack.delete() && tmpPack.exists()) tmpPack.deleteOnExit(); + if (tmpObjectSizeIndex != null && !tmpObjectSizeIndex.delete() + && tmpObjectSizeIndex.exists()) + tmpPack.deleteOnExit(); } @Override @@ -395,6 +413,15 @@ public class ObjectDirectoryPackParser extends PackParser { } } + private void writeObjectSizeIndex(int minSize) throws IOException { + try (FileOutputStream os = new FileOutputStream(tmpObjectSizeIndex)) { + PackObjectSizeIndexWriter iw = PackObjectSizeIndexWriter + .createWriter(os, minSize); + iw.write(getSortedObjectList(null)); + os.getChannel().force(true); + } + } + private PackLock renameAndOpenPack(String lockMessage) throws IOException { if (!keepEmpty && getObjectCount() == 0) { @@ -469,6 +496,27 @@ public class ObjectDirectoryPackParser extends PackParser { JGitText.get().cannotMoveIndexTo, finalIdx), e); } + if (pconfig.isWriteObjSizeIndex() && tmpObjectSizeIndex != null) { + PackFile finalObjectSizeIndex = finalPack + .create(PackExt.OBJECT_SIZE_INDEX); + try { + FileUtils.rename(tmpObjectSizeIndex, finalObjectSizeIndex, + StandardCopyOption.ATOMIC_MOVE); + } catch (IOException e) { + cleanupTemporaryFiles(); + keep.unlock(); + if (!finalPack.delete()) + finalPack.deleteOnExit(); + if (!finalIdx.delete()) { + finalIdx.deleteOnExit(); + } + throw new IOException(MessageFormat + .format(JGitText.get().cannotMoveIndexTo, + finalObjectSizeIndex), + e); + } + } + boolean interrupted = false; try { FileSnapshot snapshot = FileSnapshot.save(finalPack); |