From 5d446f410d7044fba165ad3deee7ac83864f0e96 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Fri, 5 Apr 2013 10:02:01 -0700 Subject: Support cutting existing delta chains longer than the max depth Some packs built by JGit have incredibly long delta chains due to a long standing bug in PackWriter. Google has packs created by JGit's DfsGarbageCollector with chains of 6000 objects long, or more. Inflating objects at the end of this 6000 long chain is impossible to complete within a reasonable time bound. It could take a beefy system hours to perform even using the heavily optimized native C implementation of Git, let alone with JGit. Enable pack.cutDeltaChains to be set in a configuration file to permit the PackWriter to determine the length of each delta chain and clip the chain at arbitrary points to fit within pack.depth. Delta chain cycles are still possible, but no attempt is made to detect them. A trivial chain of A->B->A will iterate for the full pack.depth configured limit (e.g. 50) and then pick an object to store as non-delta. When cutting chains the object list is walked in reverse to try and take advantage of existing chain computations. The assumption here is most deltas are near the end of the list, and their bases are near the front of the list. Going up from the tail attempts to reuse chainLength computations by relying on the memoized value in the delta base. The chainLength field in ObjectToPack is overloaded into the depth field normally used by DeltaWindow. This is acceptable because the chain cut happens before delta search, and the chainLength is reset to 0 if delta search will follow. Change-Id: Ida4fde9558f3abbbb77ade398d2af3941de9c812 --- .../jgit/internal/storage/pack/ObjectToPack.java | 12 ++++++ .../jgit/internal/storage/pack/PackWriter.java | 47 ++++++++++++++++++---- .../org/eclipse/jgit/storage/pack/PackConfig.java | 29 +++++++++++++ 3 files changed, 81 insertions(+), 7 deletions(-) (limited to 'org.eclipse.jgit') diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/ObjectToPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/ObjectToPack.java index 73d80d8462..33cb6af1e2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/ObjectToPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/ObjectToPack.java @@ -194,6 +194,18 @@ public class ObjectToPack extends PackedObjectInfo { flags = (d << DELTA_SHIFT) | (flags & NON_DELTA_MASK); } + final int getChainLength() { + return getDeltaDepth(); + } + + final void setChainLength(int len) { + setDeltaDepth(len); + } + + final void clearChainLength() { + flags &= NON_DELTA_MASK; + } + final boolean wantWrite() { return getOffset() == 1; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 34bf0eb16b..38210f3622 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -1100,6 +1100,11 @@ public class PackWriter { } endPhase(monitor); stats.timeSearchingForReuse = System.currentTimeMillis() - start; + + if (config.isReuseDeltas() && config.getCutDeltaChains()) { + cutDeltaChains(objectsLists[OBJ_TREE]); + cutDeltaChains(objectsLists[OBJ_BLOB]); + } } private void searchForReuse(ProgressMonitor monitor, List list) @@ -1110,6 +1115,29 @@ public class PackWriter { pruneEdgesFromObjectList(list); } + private void cutDeltaChains(BlockList list) + throws IOException { + int max = config.getMaxDeltaDepth(); + for (int idx = list.size() - 1; idx >= 0; idx--) { + int d = 0; + ObjectToPack b = list.get(idx).getDeltaBase(); + while (b != null) { + if (d < b.getChainLength()) + break; + b.setChainLength(++d); + if (d >= max && b.isDeltaRepresentation()) { + reselectNonDelta(b); + break; + } + b = b.getDeltaBase(); + } + } + if (config.isDeltaCompress()) { + for (ObjectToPack otp : list) + otp.clearChainLength(); + } + } + private void searchForDeltas(ProgressMonitor monitor) throws MissingObjectException, IncorrectObjectTypeException, IOException { @@ -1257,6 +1285,17 @@ public class PackWriter { return cnt; } + private void reselectNonDelta(ObjectToPack otp) throws IOException { + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + boolean old = reuseDeltas; + reuseDeltas = false; + reuseSupport.selectObjectRepresentation(this, + NullProgressMonitor.INSTANCE, + Collections.singleton(otp)); + reuseDeltas = old; + } + private void searchForDeltas(final ProgressMonitor monitor, final ObjectToPack[] list, final int cnt) throws MissingObjectException, IncorrectObjectTypeException, @@ -1448,13 +1487,7 @@ public class PackWriter { // (for example due to a concurrent repack) and a different base // was chosen, forcing a cycle. Select something other than a // delta, and write this object. - // - reuseDeltas = false; - otp.clearDeltaBase(); - otp.clearReuseAsIs(); - reuseSupport.selectObjectRepresentation(this, - NullProgressMonitor.INSTANCE, - Collections.singleton(otp)); + reselectNonDelta(otp); } otp.markWantWrite(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java index 562b2c0a2c..c1cda39de7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java @@ -169,6 +169,7 @@ public class PackConfig { private boolean buildBitmaps = DEFAULT_BUILD_BITMAPS; + private boolean cutDeltaChains; /** Create a default configuration. */ public PackConfig() { @@ -221,6 +222,7 @@ public class PackConfig { this.executor = cfg.executor; this.indexVersion = cfg.indexVersion; this.buildBitmaps = cfg.buildBitmaps; + this.cutDeltaChains = cfg.cutDeltaChains; } /** @@ -373,6 +375,31 @@ public class PackConfig { this.maxDeltaDepth = maxDeltaDepth; } + /** + * @return true if existing delta chains should be cut at + * {@link #getMaxDeltaDepth()}. Default is false, allowing existing + * chains to be of any length. + */ + public boolean getCutDeltaChains() { + return cutDeltaChains; + } + + /** + * Enable cutting existing delta chains at {@link #getMaxDeltaDepth()}. + * + * By default this is disabled and existing chains are kept at whatever + * length a prior packer was configured to create. This allows objects to be + * packed one with a large depth (for example 250), and later to quickly + * repack the repository with a shorter depth (such as 50), but reusing the + * complete delta chains created by the earlier 250 depth. + * + * @param cut + * true to cut existing chains. + */ + public void setCutDeltaChains(boolean cut) { + cutDeltaChains = cut; + } + /** * Get the number of objects to try when looking for a delta base. * @@ -686,6 +713,8 @@ public class PackConfig { setReuseObjects(rc.getBoolean("pack", "reuseobjects", isReuseObjects())); //$NON-NLS-1$ //$NON-NLS-2$ setDeltaCompress(rc.getBoolean( "pack", "deltacompression", isDeltaCompress())); //$NON-NLS-1$ //$NON-NLS-2$ + setCutDeltaChains(rc.getBoolean( + "pack", "cutdeltachains", getCutDeltaChains())); //$NON-NLS-1$ //$NON-NLS-2$ setBuildBitmaps(rc.getBoolean("pack", "buildbitmaps", isBuildBitmaps())); //$NON-NLS-1$ //$NON-NLS-2$ } -- cgit v1.2.3