From 5e250e45befd07e1d9893c6dde4c43059c5d8fd9 Mon Sep 17 00:00:00 2001 From: Thirumala Reddy Mutchukota Date: Fri, 21 Apr 2017 12:26:33 -0700 Subject: [PATCH] Delete expired garbage even when there is no GC pack present. Delete the condition to check whether the garbage pack creation time is older than the last GC operation, because it's not possible to find the last GC operation time when there is no GC pack. Add additional tests to make sure the contents of the expired garbage packs are considered during the GC operation and any actively referenced objects from the garbage packs are copied successfully into the GC pack before deleting the garbage pack. Change-Id: I09e8b2656de8ba7f9b996724ad1961d908e937b6 Signed-off-by: Thirumala Reddy Mutchukota --- .../storage/dfs/DfsGarbageCollectorTest.java | 135 ++++++++++++++---- .../storage/dfs/DfsGarbageCollector.java | 53 +------ 2 files changed, 116 insertions(+), 72 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java index c70b6f2990..32002fd01d 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java @@ -7,7 +7,6 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UN import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -180,41 +179,133 @@ public class DfsGarbageCollectorTest { RevCommit commit1 = commit().message("1").parent(commit0).create(); git.update("master", commit0); - gcNoTtl(); gcWithTtl(); - - // The repository has an UNREACHABLE_GARBAGE pack that could have - // expired, but since we never purge the most recent UNREACHABLE_GARBAGE - // pack, it must have survived the GC. - boolean commit1Found = false; + // The repository should have a GC pack with commit0 and an + // UNREACHABLE_GARBAGE pack with commit1. + assertEquals(2, odb.getPacks().length); + boolean gcPackFound = false; + boolean garbagePackFound = false; for (DfsPackFile pack : odb.getPacks()) { DfsPackDescription d = pack.getPackDescription(); if (d.getPackSource() == GC) { + gcPackFound = true; assertTrue("has commit0", isObjectInPack(commit0, pack)); assertFalse("no commit1", isObjectInPack(commit1, pack)); } else if (d.getPackSource() == UNREACHABLE_GARBAGE) { - commit1Found |= isObjectInPack(commit1, pack); + garbagePackFound = true; + assertFalse("no commit0", isObjectInPack(commit0, pack)); + assertTrue("has commit1", isObjectInPack(commit1, pack)); } else { fail("unexpected " + d.getPackSource()); } } - assertTrue("garbage commit1 still readable", commit1Found); + assertTrue("gc pack found", gcPackFound); + assertTrue("garbage pack found", garbagePackFound); + + gcWithTtl(); + // The gc operation should have removed UNREACHABLE_GARBAGE pack along with commit1. + DfsPackFile[] packs = odb.getPacks(); + assertEquals(1, packs.length); + + assertEquals(GC, packs[0].getPackDescription().getPackSource()); + assertTrue("has commit0", isObjectInPack(commit0, packs[0])); + assertFalse("no commit1", isObjectInPack(commit1, packs[0])); + } + + @Test + public void testCollectionWithGarbageAndRereferencingGarbage() + throws Exception { + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update("master", commit0); - // Find oldest UNREACHABLE_GARBAGE; it will be pruned by next GC. - DfsPackDescription oldestGarbagePack = null; + gcWithTtl(); + // The repository should have a GC pack with commit0 and an + // UNREACHABLE_GARBAGE pack with commit1. + assertEquals(2, odb.getPacks().length); + boolean gcPackFound = false; + boolean garbagePackFound = false; for (DfsPackFile pack : odb.getPacks()) { DfsPackDescription d = pack.getPackDescription(); - if (d.getPackSource() == UNREACHABLE_GARBAGE) { - oldestGarbagePack = oldestPack(oldestGarbagePack, d); + if (d.getPackSource() == GC) { + gcPackFound = true; + assertTrue("has commit0", isObjectInPack(commit0, pack)); + assertFalse("no commit1", isObjectInPack(commit1, pack)); + } else if (d.getPackSource() == UNREACHABLE_GARBAGE) { + garbagePackFound = true; + assertFalse("no commit0", isObjectInPack(commit0, pack)); + assertTrue("has commit1", isObjectInPack(commit1, pack)); + } else { + fail("unexpected " + d.getPackSource()); } } - assertNotNull("has UNREACHABLE_GARBAGE", oldestGarbagePack); + assertTrue("gc pack found", gcPackFound); + assertTrue("garbage pack found", garbagePackFound); + + git.update("master", commit1); gcWithTtl(); - assertTrue("has packs", odb.getPacks().length > 0); - for (DfsPackFile pack : odb.getPacks()) { - assertNotEquals(oldestGarbagePack, pack.getPackDescription()); - } + // The gc operation should have removed the UNREACHABLE_GARBAGE pack and + // moved commit1 into GC pack. + DfsPackFile[] packs = odb.getPacks(); + assertEquals(1, packs.length); + + assertEquals(GC, packs[0].getPackDescription().getPackSource()); + assertTrue("has commit0", isObjectInPack(commit0, packs[0])); + assertTrue("has commit1", isObjectInPack(commit1, packs[0])); + } + + @Test + public void testCollectionWithPureGarbageAndGarbagePacksPurged() + throws Exception { + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + + gcWithTtl(); + // The repository should have a single UNREACHABLE_GARBAGE pack with commit0 + // and commit1. + DfsPackFile[] packs = odb.getPacks(); + assertEquals(1, packs.length); + + assertEquals(UNREACHABLE_GARBAGE, packs[0].getPackDescription().getPackSource()); + assertTrue("has commit0", isObjectInPack(commit0, packs[0])); + assertTrue("has commit1", isObjectInPack(commit1, packs[0])); + + gcWithTtl(); + // The gc operation should have removed UNREACHABLE_GARBAGE pack along + // with commit0 and commit1. + assertEquals(0, odb.getPacks().length); + } + + @Test + public void testCollectionWithPureGarbageAndRereferencingGarbage() + throws Exception { + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + + gcWithTtl(); + // The repository should have a single UNREACHABLE_GARBAGE pack with commit0 + // and commit1. + DfsPackFile[] packs = odb.getPacks(); + assertEquals(1, packs.length); + + DfsPackDescription pack = packs[0].getPackDescription(); + assertEquals(UNREACHABLE_GARBAGE, pack.getPackSource()); + assertTrue("has commit0", isObjectInPack(commit0, packs[0])); + assertTrue("has commit1", isObjectInPack(commit1, packs[0])); + + git.update("master", commit0); + + gcWithTtl(); + // The gc operation should have moved commit0 into the GC pack and + // removed UNREACHABLE_GARBAGE along with commit1. + packs = odb.getPacks(); + assertEquals(1, packs.length); + + pack = packs[0].getPackDescription(); + assertEquals(GC, pack.getPackSource()); + assertTrue("has commit0", isObjectInPack(commit0, packs[0])); + assertFalse("no commit1", isObjectInPack(commit1, packs[0])); } @Test @@ -588,14 +679,6 @@ public class DfsGarbageCollectorTest { } } - private static DfsPackDescription oldestPack(DfsPackDescription a, - DfsPackDescription b) { - if (a != null && a.getLastModified() < b.getLastModified()) { - return a; - } - return b; - } - private int countPacks(PackSource source) throws IOException { int cnt = 0; for (DfsPackFile pack : odb.getPacks()) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java index d3803024a5..de447de4d8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java @@ -58,7 +58,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; -import java.util.Collections; import java.util.EnumSet; import java.util.GregorianCalendar; import java.util.HashSet; @@ -242,13 +241,6 @@ public class DfsGarbageCollector { Collection refsBefore = getAllRefs(); readPacksBefore(); - if (packsBefore.isEmpty()) { - if (!expiredGarbagePacks.isEmpty()) { - objdb.commitPack(noPacks(), toPrune()); - } - return true; - } - allHeads = new HashSet<>(); nonHeads = new HashSet<>(); txnHeads = new HashSet<>(); @@ -307,13 +299,12 @@ public class DfsGarbageCollector { packsBefore = new ArrayList<>(packs.length); expiredGarbagePacks = new ArrayList<>(packs.length); - long mostRecentGC = mostRecentGC(packs); long now = SystemReader.getInstance().getCurrentTime(); for (DfsPackFile p : packs) { DfsPackDescription d = p.getPackDescription(); if (d.getPackSource() != UNREACHABLE_GARBAGE) { packsBefore.add(p); - } else if (packIsExpiredGarbage(d, mostRecentGC, now)) { + } else if (packIsExpiredGarbage(d, now)) { expiredGarbagePacks.add(p); } else if (packIsCoalesceableGarbage(d, now)) { packsBefore.add(p); @@ -321,39 +312,13 @@ public class DfsGarbageCollector { } } - private static long mostRecentGC(DfsPackFile[] packs) { - long r = 0; - for (DfsPackFile p : packs) { - DfsPackDescription d = p.getPackDescription(); - if (d.getPackSource() == GC || d.getPackSource() == GC_REST) { - r = Math.max(r, d.getLastModified()); - } - } - return r; - } - - private boolean packIsExpiredGarbage(DfsPackDescription d, - long mostRecentGC, long now) { - // It should be safe to remove an UNREACHABLE_GARBAGE pack if it: - // - // (a) Predates the most recent prior run of this class. This check - // ensures the graph traversal algorithm had a chance to consider - // all objects in this pack and copied them into a GC or GC_REST - // pack if the graph contained live edges to the objects. - // - // This check is safe because of the ordering of packing; the GC - // packs are written first and then the UNREACHABLE_GARBAGE is - // constructed. Any UNREACHABLE_GARBAGE dated earlier than the GC - // was input to the prior GC's graph traversal. - // - // (b) Is older than garbagePackTtl. This check gives concurrent - // inserter threads sufficient time to identify an object is not - // in the graph and should have a new copy written, rather than - // relying on something from an UNREACHABLE_GARBAGE pack. - // - // Both (a) and (b) must be met to safely remove UNREACHABLE_GARBAGE. + private boolean packIsExpiredGarbage(DfsPackDescription d, long now) { + // Consider the garbage pack as expired when it's older than + // garbagePackTtl. This check gives concurrent inserter threads + // sufficient time to identify an object is not in the graph and should + // have a new copy written, rather than relying on something from an + // UNREACHABLE_GARBAGE pack. return d.getPackSource() == UNREACHABLE_GARBAGE - && d.getLastModified() < mostRecentGC && garbageTtlMillis > 0 && now - d.getLastModified() >= garbageTtlMillis; } @@ -605,8 +570,4 @@ public class DfsGarbageCollector { DfsBlockCache.getInstance().getOrCreate(pack, null); return pack; } - - private static List noPacks() { - return Collections.emptyList(); - } } -- 2.39.5