From d67b18353754f9d8fe2d0a41e7812068bbbca1dd Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Mon, 6 Feb 2017 16:04:29 -0800 Subject: Prefer smaller GC files during DFS garbage collection In 8ac65d33ed7a94f77cb066271669feebf9b882fc PackWriter changed its behavior to always prefer the last object representation presented to it by the ObjectReuseAsIs implementation. This was a fix to avoid delta chain cycles. Unfortunately it can lead to suboptimal compression when concurrent GCs are run on the same repository. One case is automatic GC running (with default settings) in parallel to a manual GC that has disabled delta reuse in order to generate new smaller deltas for the entire history of the repository. Running GC with no-reuse generally requires more CPU time, which also translates to a longer running time. This can lead to a race where the automatic GC completes before the no-reuse GC, leaving the repository in a state such as: no-reuse GC: size 1 GiB, mtime = 18:45 auto GC: size 8 GiB, mtime = 17:30 With the default sort ordering, the smaller no-reuse GC pack is sorted earlier in the pack list, due to its more recent mtime. During object reuse in a future GC, these smaller representations are considered first by PackWriter, but are all discarded when the auto GC file from 17:30 is examined second (due to its older mtime). Work around this in two ways. Well formed DFS repositories should have at most 1 GC pack. If 2 or more GC packs exist, break the sorting tie by selecting the smaller file earlier in the pack list. This allows all normal read code paths to favor the smaller file, which places less pressure on the DfsBlockCache. If any GC race happens, readers serving clone requests will prefer the file that is smaller. During object reuse, flip this ordering so that the smaller file is last. This allows PackWriter to see smaller deltas last, replacing larger representations that were previously considered from other pack files. Change-Id: I0b7dc8bb9711c82abd6bd16643f518cfccc6d31a --- .../storage/dfs/DfsGarbageCollectorTest.java | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'org.eclipse.jgit.test/tst/org/eclipse/jgit') diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java index 771aadc841..c70b6f2990 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java @@ -9,10 +9,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.util.Collections; import java.util.concurrent.TimeUnit; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; @@ -21,8 +23,10 @@ import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.util.SystemReader; import org.junit.After; import org.junit.Before; @@ -74,6 +78,70 @@ public class DfsGarbageCollectorTest { assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); } + @Test + public void testRacyNoReusePrefersSmaller() throws Exception { + StringBuilder msg = new StringBuilder(); + for (int i = 0; i < 100; i++) { + msg.append(i).append(": i am a teapot\n"); + } + RevBlob a = git.blob(msg.toString()); + RevCommit c0 = git.commit() + .add("tea", a) + .message("0") + .create(); + + msg.append("short and stout\n"); + RevBlob b = git.blob(msg.toString()); + RevCommit c1 = git.commit().parent(c0).tick(1) + .add("tea", b) + .message("1") + .create(); + git.update("master", c1); + + PackConfig cfg = new PackConfig(); + cfg.setReuseObjects(false); + cfg.setReuseDeltas(false); + cfg.setDeltaCompress(false); + cfg.setThreads(1); + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + gc.setPackConfig(cfg); + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription large = odb.getPacks()[0].getPackDescription(); + assertSame(PackSource.GC, large.getPackSource()); + + cfg.setDeltaCompress(true); + gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + gc.setPackConfig(cfg); + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription small = odb.getPacks()[0].getPackDescription(); + assertSame(PackSource.GC, small.getPackSource()); + assertTrue( + "delta compression pack is smaller", + small.getFileSize(PACK) < large.getFileSize(PACK)); + assertTrue( + "large pack is older", + large.getLastModified() < small.getLastModified()); + + // Forcefully reinsert the older larger GC pack. + odb.commitPack(Collections.singleton(large), null); + odb.clearCache(); + assertEquals(2, odb.getPacks().length); + + gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription rebuilt = odb.getPacks()[0].getPackDescription(); + assertEquals(small.getFileSize(PACK), rebuilt.getFileSize(PACK)); + } + @Test public void testCollectionWithGarbage() throws Exception { RevCommit commit0 = commit().message("0").create(); -- cgit v1.2.3