summaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit
diff options
context:
space:
mode:
authorShawn Pearce <spearce@spearce.org>2013-03-08 11:02:04 -0800
committerShawn Pearce <spearce@spearce.org>2013-03-08 11:07:51 -0800
commitbb002c619bc373059c4f2494da7870f5679ba845 (patch)
tree9838623d08ff4e465b2df125d011b2a04286fdac /org.eclipse.jgit
parent3ad454497cdb6a917c00b01c735f5a469b8fb2ff (diff)
downloadjgit-bb002c619bc373059c4f2494da7870f5679ba845.tar.gz
jgit-bb002c619bc373059c4f2494da7870f5679ba845.zip
Avoid repacking unreachable garbage in DfsGarbageCollector
If a repository has significant amounts of unreachable garbage the final phase to coalesce it can take longer than any other part of the garbage collection phase. Provide a setting for applications to tweak the threshold where coalescing ends and files just remain on disk. Change-Id: I5f11a998a7185c75ece3271d8bc6181bb83f54c1
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java57
1 files changed, 52 insertions, 5 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java
index 6027eadc42..76b36a416a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java
@@ -46,12 +46,11 @@ package org.eclipse.jgit.storage.dfs;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX;
-import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.storage.pack.PackExt.INDEX;
+import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -69,6 +68,7 @@ import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.pack.PackConfig;
+import org.eclipse.jgit.storage.pack.PackExt;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.io.CountingOutputStream;
@@ -90,6 +90,8 @@ public class DfsGarbageCollector {
private PackConfig packConfig;
+ private long coalesceGarbageLimit = 50 << 20;
+
private Map<String, Ref> refsBefore;
private List<DfsPackFile> packsBefore;
@@ -139,6 +141,38 @@ public class DfsGarbageCollector {
return this;
}
+ /** @return garbage packs smaller than this size will be repacked. */
+ public long getCoalesceGarbageLimit() {
+ return coalesceGarbageLimit;
+ }
+
+ /**
+ * Set the byte size limit for garbage packs to be repacked.
+ * <p>
+ * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
+ * the end of the run. This allows the garbage collector to coalesce
+ * unreachable objects into a single file.
+ * <p>
+ * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
+ * be left alone by the garbage collector. This avoids unnecessary disk IO
+ * reading and copying the objects.
+ * <p>
+ * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
+ * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
+ * <p>
+ * Keeping unreachable garbage prevents race conditions with repository
+ * changes that may suddenly need an object whose only copy was stored in
+ * the UNREACHABLE_GARBAGE pack.
+ *
+ * @param limit
+ * size in bytes.
+ * @return {@code this}
+ */
+ public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
+ coalesceGarbageLimit = limit;
+ return this;
+ }
+
/**
* Create a single new pack file containing all of the live objects.
* <p>
@@ -167,7 +201,7 @@ public class DfsGarbageCollector {
objdb.clearCache();
refsBefore = repo.getAllRefs();
- packsBefore = Arrays.asList(objdb.getPacks());
+ packsBefore = packsToRebuild();
if (packsBefore.isEmpty())
return true;
@@ -203,6 +237,19 @@ public class DfsGarbageCollector {
}
}
+ private List<DfsPackFile> packsToRebuild() throws IOException {
+ DfsPackFile[] packs = objdb.getPacks();
+ List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
+ for (DfsPackFile p : packs) {
+ DfsPackDescription d = p.getPackDescription();
+ if (d.getPackSource() != UNREACHABLE_GARBAGE)
+ out.add(p);
+ else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
+ out.add(p);
+ }
+ return out;
+ }
+
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
@@ -264,9 +311,9 @@ public class DfsGarbageCollector {
PackWriter pw = newPackWriter();
try {
RevWalk pool = new RevWalk(ctx);
+ pm.beginTask("Finding garbage", (int) getObjectsBefore());
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
- pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount());
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
@@ -276,8 +323,8 @@ public class DfsGarbageCollector {
int type = oldPack.getObjectType(ctx, ent.getOffset());
pw.addObject(pool.lookupAny(id, type));
}
- pm.endTask();
}
+ pm.endTask();
if (0 < pw.getObjectCount())
writePack(UNREACHABLE_GARBAGE, pw, pm);
} finally {