From 3c27ee1a916592d76a92032c57e66d775f830e45 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 16 Apr 2013 17:54:23 -0700 Subject: [PATCH] Support excluding objects during DFS compaction By excluding objects the compactor can avoid storing objects that are already well packed in the base GC packs, or any other pack not being replaced by the current compaction operation. For deltas the base object is still included even if the base exists in another exclusion set. This favors keeping deltas for recent history, to support faster fetch operations for clients. Change-Id: Ie822fe075fe5072fe3171450fda2f0ca507796a1 --- .../storage/dfs/DfsPackCompactor.java | 133 ++++++++++++++---- 1 file changed, 102 insertions(+), 31 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java index ddd6ff7c0e..ea563926b2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java @@ -46,6 +46,7 @@ package org.eclipse.jgit.internal.storage.dfs; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; import java.io.IOException; import java.util.ArrayList; @@ -56,6 +57,7 @@ import java.util.List; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex; +import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; @@ -88,12 +90,18 @@ public class DfsPackCompactor { private final List srcPacks; + private final List exclude; + private final List newPacks; private final List newStats; private int autoAddSize; + private RevWalk rw; + private RevFlag added; + private RevFlag isBase; + /** * Initialize a pack compactor. * @@ -104,6 +112,7 @@ public class DfsPackCompactor { repo = repository; autoAddSize = 5 * 1024 * 1024; // 5 MiB srcPacks = new ArrayList(); + exclude = new ArrayList(4); newPacks = new ArrayList(1); newStats = new ArrayList(1); } @@ -141,10 +150,48 @@ public class DfsPackCompactor { DfsPackDescription d = pack.getPackDescription(); if (d.getFileSize(PACK) < autoAddSize) add(pack); + else + exclude(pack); } return this; } + /** + * Exclude objects from the compacted pack. + * + * @param set + * objects to not include. + * @return {@code this}. + */ + public DfsPackCompactor exclude(PackWriter.ObjectIdSet set) { + exclude.add(set); + return this; + } + + /** + * Exclude objects from the compacted pack. + * + * @param pack + * objects to not include. + * @return {@code this}. + * @throws IOException + * pack index cannot be loaded. + */ + public DfsPackCompactor exclude(DfsPackFile pack) throws IOException { + final PackIndex idx; + DfsReader ctx = (DfsReader) repo.newObjectReader(); + try { + idx = pack.getPackIndex(ctx); + } finally { + ctx.release(); + } + return exclude(new PackWriter.ObjectIdSet() { + public boolean contains(AnyObjectId id) { + return idx.hasObject(id); + } + }); + } + /** * Compact the pack files together. * @@ -200,6 +247,7 @@ public class DfsPackCompactor { pw.release(); } } finally { + rw = null; ctx.release(); } } @@ -239,50 +287,73 @@ public class DfsPackCompactor { } }); - RevWalk rw = new RevWalk(ctx); - RevFlag added = rw.newFlag("ADDED"); //$NON-NLS-1$ + rw = new RevWalk(ctx); + added = rw.newFlag("ADDED"); //$NON-NLS-1$ + isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$ + List baseObjects = new BlockList(); pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); for (DfsPackFile src : srcPacks) { - List want = new BlockList(); - for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) { - ObjectId id = ent.toObjectId(); - RevObject obj = rw.lookupOrNull(id); - if (obj == null || !obj.has(added)) - want.add(new ObjectIdWithOffset(id, ent.getOffset())); - } + List want = toInclude(src, ctx); + if (want.isEmpty()) + continue; - // Sort objects by the order they appear in the pack file, for - // two benefits. Scanning object type information is faster when - // the pack is traversed in order, and this allows the PackWriter - // to be given the new objects in a relatively sane newest-first - // ordering without additional logic, like unpacking commits and - // walking a commit queue. - Collections.sort(want, new Comparator() { - public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { - return Long.signum(a.offset - b.offset); - } - }); - - // Only pack each object at most once into the output file. The - // PackWriter will later select a representation to reuse, which - // may be the version in this pack, or may be from another pack if - // the object was copied here to complete a thin pack and is larger - // than a delta from another pack. This is actually somewhat common - // if an object is modified frequently, such as the top level tree. + PackReverseIndex rev = src.getReverseIdx(ctx); + DfsObjectRepresentation rep = new DfsObjectRepresentation(src); for (ObjectIdWithOffset id : want) { int type = src.getObjectType(ctx, id.offset); RevObject obj = rw.lookupAny(id, type); - if (!obj.has(added)) { - pm.update(1); - pw.addObject(obj); - obj.add(added); + if (obj.has(added)) + continue; + + pm.update(1); + pw.addObject(obj); + obj.add(added); + + src.representation(rep, id.offset, ctx, rev); + if (rep.getFormat() != PACK_DELTA) + continue; + + RevObject base = rw.lookupAny(rep.getDeltaBase(), type); + if (!base.has(added) && !base.has(isBase)) { + baseObjects.add(base); + base.add(isBase); } } } + for (RevObject obj : baseObjects) { + if (!obj.has(added)) { + pm.update(1); + pw.addObject(obj); + obj.add(added); + } + } pm.endTask(); } + private List toInclude(DfsPackFile src, DfsReader ctx) + throws IOException { + PackIndex srcIdx = src.getPackIndex(ctx); + List want = new BlockList( + (int) srcIdx.getObjectCount()); + SCAN: for (PackIndex.MutableEntry ent : srcIdx) { + ObjectId id = ent.toObjectId(); + RevObject obj = rw.lookupOrNull(id); + if (obj != null && (obj.has(added) || obj.has(isBase))) + continue; + for (PackWriter.ObjectIdSet e : exclude) + if (e.contains(id)) + continue SCAN; + want.add(new ObjectIdWithOffset(id, ent.getOffset())); + } + Collections.sort(want, new Comparator() { + public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) { + return Long.signum(a.offset - b.offset); + } + }); + return want; + } + private static void writePack(DfsObjDatabase objdb, DfsPackDescription pack, PackWriter pw, ProgressMonitor pm) throws IOException { -- 2.39.5