package org.eclipse.jgit.internal.storage.dfs;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_REST;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
}
}
+ @Test
+ public void testEstimateGcPackSizeInNewRepo() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit1);
+
+ // Packs start out as INSERT.
+ long inputPacksSize = 32;
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ assertEquals(INSERT, pack.getPackDescription().getPackSource());
+ inputPacksSize += pack.getPackDescription().getFileSize(PACK) - 32;
+ }
+
+ gcNoTtl();
+
+ // INSERT packs are combined into a single GC pack.
+ assertEquals(1, odb.getPacks().length);
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(GC, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ @Test
+ public void testEstimateGcPackSizeWithAnExistingGcPack() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit1);
+
+ gcNoTtl();
+
+ RevCommit commit2 = commit().message("2").parent(commit1).create();
+ git.update("master", commit2);
+
+ // There will be one INSERT pack and one GC pack.
+ assertEquals(2, odb.getPacks().length);
+ boolean gcPackFound = false;
+ boolean insertPackFound = false;
+ long inputPacksSize = 32;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ gcPackFound = true;
+ } else if (d.getPackSource() == INSERT) {
+ insertPackFound = true;
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ inputPacksSize += d.getFileSize(PACK) - 32;
+ }
+ assertTrue(gcPackFound);
+ assertTrue(insertPackFound);
+
+ gcNoTtl();
+
+ // INSERT pack is combined into the GC pack.
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(GC, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ @Test
+ public void testEstimateGcRestPackSizeInNewRepo() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("refs/notes/note1", commit1);
+
+ // Packs start out as INSERT.
+ long inputPacksSize = 32;
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ assertEquals(INSERT, pack.getPackDescription().getPackSource());
+ inputPacksSize += pack.getPackDescription().getFileSize(PACK) - 32;
+ }
+
+ gcNoTtl();
+
+ // INSERT packs are combined into a single GC_REST pack.
+ assertEquals(1, odb.getPacks().length);
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(GC_REST, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ @Test
+ public void testEstimateGcRestPackSizeWithAnExistingGcPack()
+ throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("refs/notes/note1", commit1);
+
+ gcNoTtl();
+
+ RevCommit commit2 = commit().message("2").parent(commit1).create();
+ git.update("refs/notes/note2", commit2);
+
+ // There will be one INSERT pack and one GC_REST pack.
+ assertEquals(2, odb.getPacks().length);
+ boolean gcRestPackFound = false;
+ boolean insertPackFound = false;
+ long inputPacksSize = 32;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC_REST) {
+ gcRestPackFound = true;
+ } else if (d.getPackSource() == INSERT) {
+ insertPackFound = true;
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ inputPacksSize += d.getFileSize(PACK) - 32;
+ }
+ assertTrue(gcRestPackFound);
+ assertTrue(insertPackFound);
+
+ gcNoTtl();
+
+ // INSERT pack is combined into the GC_REST pack.
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(GC_REST, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ @Test
+ public void testEstimateGcPackSizesWithGcAndGcRestPacks() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ git.update("head", commit0);
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("refs/notes/note1", commit1);
+
+ gcNoTtl();
+
+ RevCommit commit2 = commit().message("2").parent(commit1).create();
+ git.update("refs/notes/note2", commit2);
+
+ // There will be one INSERT, one GC and one GC_REST packs.
+ assertEquals(3, odb.getPacks().length);
+ boolean gcPackFound = false;
+ boolean gcRestPackFound = false;
+ boolean insertPackFound = false;
+ long gcPackSize = 0;
+ long gcRestPackSize = 0;
+ long insertPackSize = 0;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ gcPackFound = true;
+ gcPackSize = d.getFileSize(PACK);
+ } else if (d.getPackSource() == GC_REST) {
+ gcRestPackFound = true;
+ gcRestPackSize = d.getFileSize(PACK);
+ } else if (d.getPackSource() == INSERT) {
+ insertPackFound = true;
+ insertPackSize = d.getFileSize(PACK);
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ }
+ assertTrue(gcPackFound);
+ assertTrue(gcRestPackFound);
+ assertTrue(insertPackFound);
+
+ gcNoTtl();
+
+ // In this test INSERT pack would be combined into the GC_REST pack.
+ // But, as there is no good heuristic to know whether the new packs will
+ // be combined into a GC pack or GC_REST packs, the new pick size is
+ // considered while estimating both the GC and GC_REST packs.
+ assertEquals(2, odb.getPacks().length);
+ gcPackFound = false;
+ gcRestPackFound = false;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ gcPackFound = true;
+ assertEquals(gcPackSize + insertPackSize - 32,
+ pack.getPackDescription().getEstimatedPackSize());
+ } else if (d.getPackSource() == GC_REST) {
+ gcRestPackFound = true;
+ assertEquals(gcRestPackSize + insertPackSize - 32,
+ pack.getPackDescription().getEstimatedPackSize());
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ }
+ assertTrue(gcPackFound);
+ assertTrue(gcRestPackFound);
+ }
+
+ @Test
+ public void testEstimateUnreachableGarbagePackSize() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit0);
+
+ assertTrue("commit0 reachable", isReachable(repo, commit0));
+ assertFalse("commit1 garbage", isReachable(repo, commit1));
+
+ // Packs start out as INSERT.
+ long packSize0 = 0;
+ long packSize1 = 0;
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ assertEquals(INSERT, d.getPackSource());
+ if (isObjectInPack(commit0, pack)) {
+ packSize0 = d.getFileSize(PACK);
+ } else if (isObjectInPack(commit1, pack)) {
+ packSize1 = d.getFileSize(PACK);
+ } else {
+ fail("expected object not found in the pack");
+ }
+ }
+
+ gcNoTtl();
+
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ // Even though just commit0 will end up in GC pack, because
+ // there is no good way to know that up front, both the pack
+ // sizes are considered while computing the estimated size of GC
+ // pack.
+ assertEquals(packSize0 + packSize1 - 32,
+ d.getEstimatedPackSize());
+ } else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
+ // commit1 is moved to UNREACHABLE_GARBAGE pack.
+ assertEquals(packSize1, d.getEstimatedPackSize());
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ }
+ }
+
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
return git.commit();
}
--- /dev/null
+/*
+ * Copyright (C) 2017, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.internal.storage.dfs;
+
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.junit.Before;
+import org.junit.Test;
+
+public class DfsPackCompacterTest {
+ private TestRepository<InMemoryRepository> git;
+ private InMemoryRepository repo;
+ private DfsObjDatabase odb;
+
+ @Before
+ public void setUp() throws IOException {
+ DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
+ git = new TestRepository<>(new InMemoryRepository(desc));
+ repo = git.getRepository();
+ odb = repo.getObjectDatabase();
+ }
+
+ @Test
+ public void testEstimateCompactPackSizeInNewRepo() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit1);
+
+ // Packs start out as INSERT.
+ long inputPacksSize = 32;
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ assertEquals(INSERT, pack.getPackDescription().getPackSource());
+ inputPacksSize += pack.getPackDescription().getFileSize(PACK) - 32;
+ }
+
+ compact();
+
+ // INSERT packs are compacted into a single COMPACT pack.
+ assertEquals(1, odb.getPacks().length);
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(COMPACT, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ @Test
+ public void testEstimateGcPackSizeWithAnExistingGcPack() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit1);
+
+ compact();
+
+ RevCommit commit2 = commit().message("2").parent(commit1).create();
+ git.update("master", commit2);
+
+ // There will be one INSERT pack and one COMPACT pack.
+ assertEquals(2, odb.getPacks().length);
+ boolean compactPackFound = false;
+ boolean insertPackFound = false;
+ long inputPacksSize = 32;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription packDescription = pack.getPackDescription();
+ if (packDescription.getPackSource() == COMPACT) {
+ compactPackFound = true;
+ }
+ if (packDescription.getPackSource() == INSERT) {
+ insertPackFound = true;
+ }
+ inputPacksSize += packDescription.getFileSize(PACK) - 32;
+ }
+ assertTrue(compactPackFound);
+ assertTrue(insertPackFound);
+
+ compact();
+
+ // INSERT pack is combined into the COMPACT pack.
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(COMPACT, pack.getPackDescription().getPackSource());
+ assertEquals(inputPacksSize,
+ pack.getPackDescription().getEstimatedPackSize());
+ }
+
+ private TestRepository<InMemoryRepository>.CommitBuilder commit() {
+ return git.commit();
+ }
+
+ private void compact() throws IOException {
+ DfsPackCompactor compactor = new DfsPackCompactor(repo);
+ compactor.autoAdd();
+ compactor.compact(null);
+ odb.clearCache();
+ }
+}
package org.eclipse.jgit.internal.storage.dfs;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_REST;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.RECEIVE;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.file.PackIndex;
+import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
pw.setTagTargets(tagTargets);
pw.preparePack(pm, allHeads, PackWriter.NONE);
if (0 < pw.getObjectCount())
- writePack(GC, pw, pm);
+ writePack(GC, pw, pm,
+ estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC));
}
}
pw.excludeObjects(packedObjs);
pw.preparePack(pm, nonHeads, allHeads);
if (0 < pw.getObjectCount())
- writePack(GC_REST, pw, pm);
+ writePack(GC_REST, pw, pm,
+ estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC_REST));
}
}
pw.excludeObjects(packedObjs);
pw.preparePack(pm, txnHeads, PackWriter.NONE);
if (0 < pw.getObjectCount())
- writePack(GC_TXN, pw, pm);
+ writePack(GC_TXN, pw, pm, 0 /* unknown pack size */);
}
}
pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(true);
pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
+ long estimatedPackSize = 12 + 20; // header and trailer sizes.
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
+ PackReverseIndex oldRevIdx = oldPack.getReverseIdx(ctx);
+ long maxOffset = oldPack.getPackDescription().getFileSize(PACK)
+ - 20; // pack size - trailer size.
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
if (pool.lookupOrNull(id) != null || anyPackHas(id))
continue;
- int type = oldPack.getObjectType(ctx, ent.getOffset());
+ long offset = ent.getOffset();
+ int type = oldPack.getObjectType(ctx, offset);
pw.addObject(pool.lookupAny(id, type));
+ long objSize = oldRevIdx.findNextOffset(offset, maxOffset)
+ - offset;
+ estimatedPackSize += objSize;
}
}
pm.endTask();
if (0 < pw.getObjectCount())
- writePack(UNREACHABLE_GARBAGE, pw, pm);
+ writePack(UNREACHABLE_GARBAGE, pw, pm, estimatedPackSize);
}
}
return pw;
}
+ private long estimateGcPackSize(PackSource first, PackSource... rest) {
+ EnumSet<PackSource> sourceSet = EnumSet.of(first, rest);
+ // Every pack file contains 12 bytes of header and 20 bytes of trailer.
+ // Include the final pack file header and trailer size here and ignore
+ // the same from individual pack files.
+ long size = 32;
+ for (DfsPackDescription pack : getSourcePacks()) {
+ if (sourceSet.contains(pack.getPackSource())) {
+ size += pack.getFileSize(PACK) - 32;
+ }
+ }
+ return size;
+ }
+
private DfsPackDescription writePack(PackSource source, PackWriter pw,
- ProgressMonitor pm) throws IOException {
- DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
+ ProgressMonitor pm, long estimatedPackSize) throws IOException {
+ DfsPackDescription pack = repo.getObjectDatabase().newPack(source,
+ estimatedPackSize);
newPackDesc.add(pack);
try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
protected abstract DfsPackDescription newPack(PackSource source)
throws IOException;
+ /**
+ * Generate a new unique name for a pack file.
+ *
+ * <p>
+ * Default implementation of this method would be equivalent to
+ * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the
+ * clients can override this method to use the given
+ * {@code estomatedPackSize} value more efficiently in the process of
+ * creating a new {@link DfsPackDescription} object.
+ *
+ * @param source
+ * where the pack stream is created.
+ * @param estimatedPackSize
+ * the estimated size of the pack.
+ * @return a unique name for the pack file. Must not collide with any other
+ * pack file name in the same DFS.
+ * @throws IOException
+ * a new unique pack description cannot be generated.
+ */
+ protected DfsPackDescription newPack(PackSource source,
+ long estimatedPackSize) throws IOException {
+ DfsPackDescription pack = newPack(source);
+ pack.setEstimatedPackSize(estimatedPackSize);
+ return pack;
+ }
+
/**
* Commit a pack and index pair that was written to the DFS.
* <p>
}
boolean rollback = true;
- DfsPackDescription pack = objdb.newPack(COMPACT);
+ DfsPackDescription pack = objdb.newPack(COMPACT,
+ estimatePackSize());
try {
writePack(objdb, pack, pw, pm);
writeIndex(objdb, pack, pw);
}
}
+ private long estimatePackSize() {
+ // Every pack file contains 12 bytes of header and 20 bytes of trailer.
+ // Include the final pack file header and trailer size here and ignore
+ // the same from individual pack files.
+ long size = 32;
+ for (DfsPackFile pack : srcPacks) {
+ size += pack.getPackDescription().getFileSize(PACK) - 32;
+ }
+ return size;
+ }
+
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
private int indexVersion;
+ private long estimatedPackSize;
+
/**
* Initialize a description by pack name and repository.
* <p>
return size == null ? 0 : size.longValue();
}
+ /**
+ * @param estimatedPackSize
+ * estimated size of the .pack file in bytes. If 0 the pack file
+ * size is unknown.
+ * @return {@code this}
+ */
+ public DfsPackDescription setEstimatedPackSize(long estimatedPackSize) {
+ this.estimatedPackSize = Math.max(0, estimatedPackSize);
+ return this;
+ }
+
+ /**
+ * @return estimated size of the .pack file in bytes. If 0 the pack file
+ * size is unknown.
+ */
+ public long getEstimatedPackSize() {
+ return estimatedPackSize;
+ }
+
/** @return number of objects in the pack. */
public long getObjectCount() {
return objectCount;