import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX;
-import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.storage.pack.PackExt.INDEX;
+import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.pack.PackConfig;
+import org.eclipse.jgit.storage.pack.PackExt;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.io.CountingOutputStream;
private PackConfig packConfig;
+ private long coalesceGarbageLimit = 50 << 20;
+
private Map<String, Ref> refsBefore;
private List<DfsPackFile> packsBefore;
return this;
}
+ /** @return garbage packs smaller than this size will be repacked. */
+ public long getCoalesceGarbageLimit() {
+ return coalesceGarbageLimit;
+ }
+
+ /**
+ * Set the byte size limit for garbage packs to be repacked.
+ * <p>
+ * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
+ * the end of the run. This allows the garbage collector to coalesce
+ * unreachable objects into a single file.
+ * <p>
+ * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
+ * be left alone by the garbage collector. This avoids unnecessary disk IO
+ * reading and copying the objects.
+ * <p>
+ * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
+ * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
+ * <p>
+ * Keeping unreachable garbage prevents race conditions with repository
+ * changes that may suddenly need an object whose only copy was stored in
+ * the UNREACHABLE_GARBAGE pack.
+ *
+ * @param limit
+ * size in bytes.
+ * @return {@code this}
+ */
+ public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
+ coalesceGarbageLimit = limit;
+ return this;
+ }
+
/**
* Create a single new pack file containing all of the live objects.
* <p>
objdb.clearCache();
refsBefore = repo.getAllRefs();
- packsBefore = Arrays.asList(objdb.getPacks());
+ packsBefore = packsToRebuild();
if (packsBefore.isEmpty())
return true;
}
}
+ private List<DfsPackFile> packsToRebuild() throws IOException {
+ DfsPackFile[] packs = objdb.getPacks();
+ List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
+ for (DfsPackFile p : packs) {
+ DfsPackDescription d = p.getPackDescription();
+ if (d.getPackSource() != UNREACHABLE_GARBAGE)
+ out.add(p);
+ else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
+ out.add(p);
+ }
+ return out;
+ }
+
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
return toPrune();
PackWriter pw = newPackWriter();
try {
RevWalk pool = new RevWalk(ctx);
+ pm.beginTask("Finding garbage", (int) getObjectsBefore());
for (DfsPackFile oldPack : packsBefore) {
PackIndex oldIdx = oldPack.getPackIndex(ctx);
- pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount());
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
int type = oldPack.getObjectType(ctx, ent.getOffset());
pw.addObject(pool.lookupAny(id, type));
}
- pm.endTask();
}
+ pm.endTask();
if (0 < pw.getObjectCount())
writePack(UNREACHABLE_GARBAGE, pw, pm);
} finally {