diff options
author | Shawn Pearce <spearce@spearce.org> | 2013-03-08 11:02:04 -0800 |
---|---|---|
committer | Shawn Pearce <spearce@spearce.org> | 2013-03-08 11:07:51 -0800 |
commit | bb002c619bc373059c4f2494da7870f5679ba845 (patch) | |
tree | 9838623d08ff4e465b2df125d011b2a04286fdac /org.eclipse.jgit | |
parent | 3ad454497cdb6a917c00b01c735f5a469b8fb2ff (diff) | |
download | jgit-bb002c619bc373059c4f2494da7870f5679ba845.tar.gz jgit-bb002c619bc373059c4f2494da7870f5679ba845.zip |
Avoid repacking unreachable garbage in DfsGarbageCollector
If a repository has significant amounts of unreachable garbage the
final phase to coalesce it can take longer than any other part of the
garbage collection phase. Provide a setting for applications to tweak
the threshold where coalescing ends and files just remain on disk.
Change-Id: I5f11a998a7185c75ece3271d8bc6181bb83f54c1
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java | 57 |
1 files changed, 52 insertions, 5 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java index 6027eadc42..76b36a416a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java @@ -46,12 +46,11 @@ package org.eclipse.jgit.storage.dfs; import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC; import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX; -import static org.eclipse.jgit.storage.pack.PackExt.PACK; import static org.eclipse.jgit.storage.pack.PackExt.INDEX; +import static org.eclipse.jgit.storage.pack.PackExt.PACK; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -69,6 +68,7 @@ import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource; import org.eclipse.jgit.storage.file.PackIndex; import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.storage.pack.PackExt; import org.eclipse.jgit.storage.pack.PackWriter; import org.eclipse.jgit.util.io.CountingOutputStream; @@ -90,6 +90,8 @@ public class DfsGarbageCollector { private PackConfig packConfig; + private long coalesceGarbageLimit = 50 << 20; + private Map<String, Ref> refsBefore; private List<DfsPackFile> packsBefore; @@ -139,6 +141,38 @@ public class DfsGarbageCollector { return this; } + /** @return garbage packs smaller than this size will be repacked. */ + public long getCoalesceGarbageLimit() { + return coalesceGarbageLimit; + } + + /** + * Set the byte size limit for garbage packs to be repacked. + * <p> + * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at + * the end of the run. This allows the garbage collector to coalesce + * unreachable objects into a single file. + * <p> + * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will + * be left alone by the garbage collector. This avoids unnecessary disk IO + * reading and copying the objects. + * <p> + * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br> + * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced. + * <p> + * Keeping unreachable garbage prevents race conditions with repository + * changes that may suddenly need an object whose only copy was stored in + * the UNREACHABLE_GARBAGE pack. + * + * @param limit + * size in bytes. + * @return {@code this} + */ + public DfsGarbageCollector setCoalesceGarbageLimit(long limit) { + coalesceGarbageLimit = limit; + return this; + } + /** * Create a single new pack file containing all of the live objects. * <p> @@ -167,7 +201,7 @@ public class DfsGarbageCollector { objdb.clearCache(); refsBefore = repo.getAllRefs(); - packsBefore = Arrays.asList(objdb.getPacks()); + packsBefore = packsToRebuild(); if (packsBefore.isEmpty()) return true; @@ -203,6 +237,19 @@ public class DfsGarbageCollector { } } + private List<DfsPackFile> packsToRebuild() throws IOException { + DfsPackFile[] packs = objdb.getPacks(); + List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length); + for (DfsPackFile p : packs) { + DfsPackDescription d = p.getPackDescription(); + if (d.getPackSource() != UNREACHABLE_GARBAGE) + out.add(p); + else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) + out.add(p); + } + return out; + } + /** @return all of the source packs that fed into this compaction. */ public List<DfsPackDescription> getSourcePacks() { return toPrune(); @@ -264,9 +311,9 @@ public class DfsGarbageCollector { PackWriter pw = newPackWriter(); try { RevWalk pool = new RevWalk(ctx); + pm.beginTask("Finding garbage", (int) getObjectsBefore()); for (DfsPackFile oldPack : packsBefore) { PackIndex oldIdx = oldPack.getPackIndex(ctx); - pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount()); for (PackIndex.MutableEntry ent : oldIdx) { pm.update(1); ObjectId id = ent.toObjectId(); @@ -276,8 +323,8 @@ public class DfsGarbageCollector { int type = oldPack.getObjectType(ctx, ent.getOffset()); pw.addObject(pool.lookupAny(id, type)); } - pm.endTask(); } + pm.endTask(); if (0 < pw.getObjectCount()) writePack(UNREACHABLE_GARBAGE, pw, pm); } finally { |