Browse Source

Fix missing deltas near type boundaries

Delta search was discarding discovered deltas if an object appeared
near a type boundary in the delta search window. This has caused JGit
to produce larger pack files than other implementations of the packing
algorithm.

Delta search works by pushing prior objects into a search window, an
ordered list of objects to attempt to delta compress the next object
against. (The window size is bounded, avoiding O(N^2) behavior.)

For implementation reasons multiple object types can appear in the
input list, and the window. PackWriter commonly passes both trees and
blobs in the input list handed to the DeltaWindow algorithm. The pack
file format requires an object to only delta compress against the same
type, so the DeltaWindow algorithm must stop doing comparisions if a
blob would be compared to a tree.

Because the input list is sorted by object type and the window is
recently considered prior objects, once a wrong type is discovered in
the window the search algorithm stops and uses the current result.

Unfortunately the termination condition was discarding any found
delta by setting deltaBase and deltaBuf to null when it was trying
to break the window search.

When this bug occurs, the state of the DeltaWindow looks like this:

                                 current
                                  |
                                 \ /
  input list:  tree0 tree1 blob1 blob2

  window:      blob1 tree1 tree0
                / \
                 |
              res.prev

As the loop iterates to the right across the window, it first finds
that blob1 is a suitable delta base for blob2, and temporarily holds
this in the bestDelta/deltaBuf fields. It then considers tree1, but
tree1 has the wrong type (blob != tree), so the window loop must give
up and fall through the remaining code.

Moving the condition up and discarding the window contents allows
the bestDelta/deltaBuf to be kept, letting the final file delta
compress blob1 against blob0.

The impact of this bug (and its fix) on real world repositories is
likely minimal. The boundary from blob to tree happens approximately
once in the search, as the input list is sorted by type. Only the
first window size worth of blobs (e.g. 10 or 250) were failing to
produce a delta in the final file.

This bug fix does produce significantly different results for small
test repositories created in the unit test suite, such as when a pack
may contains 6 objects (2 commits, 2 trees, 2 blobs).  Packing test
cases can now better sample different output pack file sizes depending
on delta compression and object reuse flags in PackConfig.

Change-Id: Ibec09398d0305d4dbc0c66fce1daaf38eb71148f
tags/v4.7.0.201704051617-r
Shawn Pearce 7 years ago
parent
commit
61d4922928

+ 17
- 11
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java View File

ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"), ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"),
ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") ,
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };
try (final RevWalk parser = new RevWalk(db)) { try (final RevWalk parser = new RevWalk(db)) {
final RevObject forcedOrderRevs[] = new RevObject[forcedOrder.length]; final RevObject forcedOrderRevs[] = new RevObject[forcedOrder.length];
for (int i = 0; i < forcedOrder.length; i++) for (int i = 0; i < forcedOrder.length; i++)
*/ */
@Test @Test
public void testWritePack2SizeDeltasVsNoDeltas() throws Exception { public void testWritePack2SizeDeltasVsNoDeltas() throws Exception {
config.setReuseDeltas(false);
config.setDeltaCompress(false);
testWritePack2(); testWritePack2();
final long sizePack2NoDeltas = os.size(); final long sizePack2NoDeltas = os.size();
tearDown(); tearDown();
ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
ObjectId.fromString("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), ObjectId.fromString("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3"),
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };


assertEquals(expectedOrder.length, writer.getObjectCount()); assertEquals(expectedOrder.length, writer.getObjectCount());
verifyObjectsOrder(expectedOrder); verifyObjectsOrder(expectedOrder);
ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"), ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"),
ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
if (deltaReuse) {
// objects order influenced (swapped) by delta-base first rule
ObjectId temp = expectedOrder[4];
expectedOrder[4] = expectedOrder[5];
expectedOrder[5] = temp;
ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") ,
ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };
if (!config.isReuseDeltas() && !config.isDeltaCompress()) {
// If no deltas are in the file the final two entries swap places.
swap(expectedOrder, 4, 5);
} }
assertEquals(expectedOrder.length, writer.getObjectCount()); assertEquals(expectedOrder.length, writer.getObjectCount());
verifyObjectsOrder(expectedOrder); verifyObjectsOrder(expectedOrder);
.computeName().name()); .computeName().name());
} }


private static void swap(ObjectId[] arr, int a, int b) {
ObjectId tmp = arr[a];
arr[a] = arr[b];
arr[b] = tmp;
}

private void writeVerifyPack4(final boolean thin) throws IOException { private void writeVerifyPack4(final boolean thin) throws IOException {
final HashSet<ObjectId> interestings = new HashSet<ObjectId>(); final HashSet<ObjectId> interestings = new HashSet<ObjectId>();
interestings.add(ObjectId interestings.add(ObjectId

+ 10
- 6
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java View File

clear(n); clear(n);
} }
res.set(next); res.set(next);
clearWindowOnTypeSwitch();


if (res.object.isEdge() || res.object.doNotAttemptDelta()) { if (res.object.isEdge() || res.object.doNotAttemptDelta()) {
// We don't actually want to make a delta for // We don't actually want to make a delta for
return DeltaIndex.estimateIndexSize(len) - len; return DeltaIndex.estimateIndexSize(len) - len;
} }


private void clearWindowOnTypeSwitch() {
DeltaWindowEntry p = res.prev;
if (!p.empty() && res.type() != p.type()) {
for (; p != res; p = p.prev) {
clear(p);
}
}
}

private void clear(DeltaWindowEntry ent) { private void clear(DeltaWindowEntry ent) {
if (ent.index != null) if (ent.index != null)
loaded -= ent.index.getIndexSize(); loaded -= ent.index.getIndexSize();


private boolean delta(final DeltaWindowEntry src) private boolean delta(final DeltaWindowEntry src)
throws IOException { throws IOException {
// Objects must use only the same type as their delta base.
if (src.type() != res.type()) {
keepInWindow();
return NEXT_RES;
}

// If the sizes are radically different, this is a bad pairing. // If the sizes are radically different, this is a bad pairing.
if (res.size() < src.size() >>> 4) if (res.size() < src.size() >>> 4)
return NEXT_SRC; return NEXT_SRC;

Loading…
Cancel
Save