Browse Source

Support cutting existing delta chains longer than the max depth

Some packs built by JGit have incredibly long delta chains due to a
long standing bug in PackWriter. Google has packs created by JGit's
DfsGarbageCollector with chains of 6000 objects long, or more.

Inflating objects at the end of this 6000 long chain is impossible
to complete within a reasonable time bound. It could take a beefy
system hours to perform even using the heavily optimized native C
implementation of Git, let alone with JGit.

Enable pack.cutDeltaChains to be set in a configuration file to
permit the PackWriter to determine the length of each delta chain
and clip the chain at arbitrary points to fit within pack.depth.

Delta chain cycles are still possible, but no attempt is made to
detect them. A trivial chain of A->B->A will iterate for the full
pack.depth configured limit (e.g. 50) and then pick an object to
store as non-delta.

When cutting chains the object list is walked in reverse to try
and take advantage of existing chain computations. The assumption
here is most deltas are near the end of the list, and their bases
are near the front of the list. Going up from the tail attempts to
reuse chainLength computations by relying on the memoized value in
the delta base.

The chainLength field in ObjectToPack is overloaded into the depth
field normally used by DeltaWindow. This is acceptable because the
chain cut happens before delta search, and the chainLength is reset
to 0 if delta search will follow.

Change-Id: Ida4fde9558f3abbbb77ade398d2af3941de9c812
tags/v3.0.0.201305080800-m7
Shawn Pearce 11 years ago
parent
commit
5d446f410d

+ 12
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/ObjectToPack.java View File

@@ -194,6 +194,18 @@ public class ObjectToPack extends PackedObjectInfo {
flags = (d << DELTA_SHIFT) | (flags & NON_DELTA_MASK);
}

final int getChainLength() {
return getDeltaDepth();
}

final void setChainLength(int len) {
setDeltaDepth(len);
}

final void clearChainLength() {
flags &= NON_DELTA_MASK;
}

final boolean wantWrite() {
return getOffset() == 1;
}

+ 40
- 7
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java View File

@@ -1100,6 +1100,11 @@ public class PackWriter {
}
endPhase(monitor);
stats.timeSearchingForReuse = System.currentTimeMillis() - start;

if (config.isReuseDeltas() && config.getCutDeltaChains()) {
cutDeltaChains(objectsLists[OBJ_TREE]);
cutDeltaChains(objectsLists[OBJ_BLOB]);
}
}

private void searchForReuse(ProgressMonitor monitor, List<ObjectToPack> list)
@@ -1110,6 +1115,29 @@ public class PackWriter {
pruneEdgesFromObjectList(list);
}

private void cutDeltaChains(BlockList<ObjectToPack> list)
throws IOException {
int max = config.getMaxDeltaDepth();
for (int idx = list.size() - 1; idx >= 0; idx--) {
int d = 0;
ObjectToPack b = list.get(idx).getDeltaBase();
while (b != null) {
if (d < b.getChainLength())
break;
b.setChainLength(++d);
if (d >= max && b.isDeltaRepresentation()) {
reselectNonDelta(b);
break;
}
b = b.getDeltaBase();
}
}
if (config.isDeltaCompress()) {
for (ObjectToPack otp : list)
otp.clearChainLength();
}
}

private void searchForDeltas(ProgressMonitor monitor)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
@@ -1257,6 +1285,17 @@ public class PackWriter {
return cnt;
}

private void reselectNonDelta(ObjectToPack otp) throws IOException {
otp.clearDeltaBase();
otp.clearReuseAsIs();
boolean old = reuseDeltas;
reuseDeltas = false;
reuseSupport.selectObjectRepresentation(this,
NullProgressMonitor.INSTANCE,
Collections.singleton(otp));
reuseDeltas = old;
}

private void searchForDeltas(final ProgressMonitor monitor,
final ObjectToPack[] list, final int cnt)
throws MissingObjectException, IncorrectObjectTypeException,
@@ -1448,13 +1487,7 @@ public class PackWriter {
// (for example due to a concurrent repack) and a different base
// was chosen, forcing a cycle. Select something other than a
// delta, and write this object.
//
reuseDeltas = false;
otp.clearDeltaBase();
otp.clearReuseAsIs();
reuseSupport.selectObjectRepresentation(this,
NullProgressMonitor.INSTANCE,
Collections.singleton(otp));
reselectNonDelta(otp);
}
otp.markWantWrite();


+ 29
- 0
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java View File

@@ -169,6 +169,7 @@ public class PackConfig {

private boolean buildBitmaps = DEFAULT_BUILD_BITMAPS;

private boolean cutDeltaChains;

/** Create a default configuration. */
public PackConfig() {
@@ -221,6 +222,7 @@ public class PackConfig {
this.executor = cfg.executor;
this.indexVersion = cfg.indexVersion;
this.buildBitmaps = cfg.buildBitmaps;
this.cutDeltaChains = cfg.cutDeltaChains;
}

/**
@@ -373,6 +375,31 @@ public class PackConfig {
this.maxDeltaDepth = maxDeltaDepth;
}

/**
* @return true if existing delta chains should be cut at
* {@link #getMaxDeltaDepth()}. Default is false, allowing existing
* chains to be of any length.
*/
public boolean getCutDeltaChains() {
return cutDeltaChains;
}

/**
* Enable cutting existing delta chains at {@link #getMaxDeltaDepth()}.
*
* By default this is disabled and existing chains are kept at whatever
* length a prior packer was configured to create. This allows objects to be
* packed one with a large depth (for example 250), and later to quickly
* repack the repository with a shorter depth (such as 50), but reusing the
* complete delta chains created by the earlier 250 depth.
*
* @param cut
* true to cut existing chains.
*/
public void setCutDeltaChains(boolean cut) {
cutDeltaChains = cut;
}

/**
* Get the number of objects to try when looking for a delta base.
*
@@ -686,6 +713,8 @@ public class PackConfig {
setReuseObjects(rc.getBoolean("pack", "reuseobjects", isReuseObjects())); //$NON-NLS-1$ //$NON-NLS-2$
setDeltaCompress(rc.getBoolean(
"pack", "deltacompression", isDeltaCompress())); //$NON-NLS-1$ //$NON-NLS-2$
setCutDeltaChains(rc.getBoolean(
"pack", "cutdeltachains", getCutDeltaChains())); //$NON-NLS-1$ //$NON-NLS-2$
setBuildBitmaps(rc.getBoolean("pack", "buildbitmaps", isBuildBitmaps())); //$NON-NLS-1$ //$NON-NLS-2$
}


Loading…
Cancel
Save