]> source.dussan.org Git - jgit.git/commitdiff
Fix missing deltas near type boundaries 04/90604/3
authorShawn Pearce <spearce@spearce.org>
Wed, 8 Feb 2017 05:00:30 +0000 (21:00 -0800)
committerShawn Pearce <spearce@spearce.org>
Wed, 8 Feb 2017 22:36:24 +0000 (14:36 -0800)
Delta search was discarding discovered deltas if an object appeared
near a type boundary in the delta search window. This has caused JGit
to produce larger pack files than other implementations of the packing
algorithm.

Delta search works by pushing prior objects into a search window, an
ordered list of objects to attempt to delta compress the next object
against. (The window size is bounded, avoiding O(N^2) behavior.)

For implementation reasons multiple object types can appear in the
input list, and the window. PackWriter commonly passes both trees and
blobs in the input list handed to the DeltaWindow algorithm. The pack
file format requires an object to only delta compress against the same
type, so the DeltaWindow algorithm must stop doing comparisions if a
blob would be compared to a tree.

Because the input list is sorted by object type and the window is
recently considered prior objects, once a wrong type is discovered in
the window the search algorithm stops and uses the current result.

Unfortunately the termination condition was discarding any found
delta by setting deltaBase and deltaBuf to null when it was trying
to break the window search.

When this bug occurs, the state of the DeltaWindow looks like this:

                                 current
                                  |
                                 \ /
  input list:  tree0 tree1 blob1 blob2

  window:      blob1 tree1 tree0
                / \
                 |
              res.prev

As the loop iterates to the right across the window, it first finds
that blob1 is a suitable delta base for blob2, and temporarily holds
this in the bestDelta/deltaBuf fields. It then considers tree1, but
tree1 has the wrong type (blob != tree), so the window loop must give
up and fall through the remaining code.

Moving the condition up and discarding the window contents allows
the bestDelta/deltaBuf to be kept, letting the final file delta
compress blob1 against blob0.

The impact of this bug (and its fix) on real world repositories is
likely minimal. The boundary from blob to tree happens approximately
once in the search, as the input list is sorted by type. Only the
first window size worth of blobs (e.g. 10 or 250) were failing to
produce a delta in the final file.

This bug fix does produce significantly different results for small
test repositories created in the unit test suite, such as when a pack
may contains 6 objects (2 commits, 2 trees, 2 blobs).  Packing test
cases can now better sample different output pack file sizes depending
on delta compression and object reuse flags in PackConfig.

Change-Id: Ibec09398d0305d4dbc0c66fce1daaf38eb71148f

org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java

index 2e579524861ac8b7c4362c8b177a65d0ae8a89b0..e4dfe7587cdb72aecbf57a325abb2ae9276c5076 100644 (file)
@@ -364,8 +364,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
                                ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"),
                                ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
                                ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
-                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
-                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
+                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") ,
+                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };
                try (final RevWalk parser = new RevWalk(db)) {
                        final RevObject forcedOrderRevs[] = new RevObject[forcedOrder.length];
                        for (int i = 0; i < forcedOrder.length; i++)
@@ -412,6 +412,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
         */
        @Test
        public void testWritePack2SizeDeltasVsNoDeltas() throws Exception {
+               config.setReuseDeltas(false);
+               config.setDeltaCompress(false);
                testWritePack2();
                final long sizePack2NoDeltas = os.size();
                tearDown();
@@ -739,8 +741,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
                                ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
                                ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
                                ObjectId.fromString("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
-                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
-                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
+                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3"),
+                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };
 
                assertEquals(expectedOrder.length, writer.getObjectCount());
                verifyObjectsOrder(expectedOrder);
@@ -763,13 +765,11 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
                                ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"),
                                ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"),
                                ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"),
-                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"),
-                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") };
-               if (deltaReuse) {
-                       // objects order influenced (swapped) by delta-base first rule
-                       ObjectId temp = expectedOrder[4];
-                       expectedOrder[4] = expectedOrder[5];
-                       expectedOrder[5] = temp;
+                               ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") ,
+                               ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") };
+               if (!config.isReuseDeltas() && !config.isDeltaCompress()) {
+                       // If no deltas are in the file the final two entries swap places.
+                       swap(expectedOrder, 4, 5);
                }
                assertEquals(expectedOrder.length, writer.getObjectCount());
                verifyObjectsOrder(expectedOrder);
@@ -777,6 +777,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
                                .computeName().name());
        }
 
+       private static void swap(ObjectId[] arr, int a, int b) {
+               ObjectId tmp = arr[a];
+               arr[a] = arr[b];
+               arr[b] = tmp;
+       }
+
        private void writeVerifyPack4(final boolean thin) throws IOException {
                final HashSet<ObjectId> interestings = new HashSet<ObjectId>();
                interestings.add(ObjectId
index 19d06a23f859563d59a4a12576a87c47f288a624..73b285afc17c7b7d789130c14a025c56c3f4b0ed 100644 (file)
@@ -160,6 +160,7 @@ final class DeltaWindow {
                                                clear(n);
                                }
                                res.set(next);
+                               clearWindowOnTypeSwitch();
 
                                if (res.object.isEdge() || res.object.doNotAttemptDelta()) {
                                        // We don't actually want to make a delta for
@@ -194,6 +195,15 @@ final class DeltaWindow {
                return DeltaIndex.estimateIndexSize(len) - len;
        }
 
+       private void clearWindowOnTypeSwitch() {
+               DeltaWindowEntry p = res.prev;
+               if (!p.empty() && res.type() != p.type()) {
+                       for (; p != res; p = p.prev) {
+                               clear(p);
+                       }
+               }
+       }
+
        private void clear(DeltaWindowEntry ent) {
                if (ent.index != null)
                        loaded -= ent.index.getIndexSize();
@@ -258,12 +268,6 @@ final class DeltaWindow {
 
        private boolean delta(final DeltaWindowEntry src)
                        throws IOException {
-               // Objects must use only the same type as their delta base.
-               if (src.type() != res.type()) {
-                       keepInWindow();
-                       return NEXT_RES;
-               }
-
                // If the sizes are radically different, this is a bad pairing.
                if (res.size() < src.size() >>> 4)
                        return NEXT_SRC;