summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java239
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java117
2 files changed, 345 insertions, 11 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
new file mode 100644
index 0000000000..c7125bb9fe
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
@@ -0,0 +1,239 @@
+package org.eclipse.jgit.internal.storage.dfs;
+
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
+import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.lib.AnyObjectId;
+import org.eclipse.jgit.lib.Ref;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.revwalk.RevWalk;
+import org.junit.Before;
+import org.junit.Test;
+
+public class DfsGarbageCollectorTest {
+ private TestRepository<InMemoryRepository> git;
+ private InMemoryRepository repo;
+ private DfsObjDatabase odb;
+
+ @Before
+ public void setUp() throws IOException {
+ DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
+ git = new TestRepository<>(new InMemoryRepository(desc));
+ repo = git.getRepository();
+ odb = repo.getObjectDatabase();
+ }
+
+ @Test
+ public void testCollectionWithNoGarbage() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit1);
+
+ assertTrue("commit0 reachable", isReachable(repo, commit0));
+ assertTrue("commit1 reachable", isReachable(repo, commit1));
+
+ // Packs start out as INSERT.
+ assertEquals(2, odb.getPacks().length);
+ for (DfsPackFile pack : odb.getPacks()) {
+ assertEquals(INSERT, pack.getPackDescription().getPackSource());
+ }
+
+ gcNoTtl();
+
+ // Single GC pack present with all objects.
+ assertEquals(1, odb.getPacks().length);
+ DfsPackFile pack = odb.getPacks()[0];
+ assertEquals(GC, pack.getPackDescription().getPackSource());
+ assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
+ assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
+ }
+
+ @Test
+ public void testCollectionWithGarbage() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit0);
+
+ assertTrue("commit0 reachable", isReachable(repo, commit0));
+ assertFalse("commit1 garbage", isReachable(repo, commit1));
+ gcNoTtl();
+
+ assertEquals(2, odb.getPacks().length);
+ DfsPackFile gc = null;
+ DfsPackFile garbage = null;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ gc = pack;
+ } else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
+ garbage = pack;
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ }
+
+ assertNotNull("created GC pack", gc);
+ assertTrue(isObjectInPack(commit0, gc));
+
+ assertNotNull("created UNREACHABLE_GARBAGE pack", garbage);
+ assertTrue(isObjectInPack(commit1, garbage));
+ }
+
+ @Test
+ public void testCollectionWithGarbageAndGarbagePacksPurged()
+ throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit0);
+
+ gcNoTtl();
+ gcWithTtl();
+
+ // The repository has an UNREACHABLE_GARBAGE pack that could have
+ // expired, but since we never purge the most recent UNREACHABLE_GARBAGE
+ // pack, it must have survived the GC.
+ boolean commit1Found = false;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == GC) {
+ assertTrue("has commit0", isObjectInPack(commit0, pack));
+ assertFalse("no commit1", isObjectInPack(commit1, pack));
+ } else if (d.getPackSource() == UNREACHABLE_GARBAGE) {
+ commit1Found |= isObjectInPack(commit1, pack);
+ } else {
+ fail("unexpected " + d.getPackSource());
+ }
+ }
+ assertTrue("garbage commit1 still readable", commit1Found);
+
+ // Find oldest UNREACHABLE_GARBAGE; it will be pruned by next GC.
+ DfsPackDescription oldestGarbagePack = null;
+ for (DfsPackFile pack : odb.getPacks()) {
+ DfsPackDescription d = pack.getPackDescription();
+ if (d.getPackSource() == UNREACHABLE_GARBAGE) {
+ oldestGarbagePack = oldestPack(oldestGarbagePack, d);
+ }
+ }
+ assertNotNull("has UNREACHABLE_GARBAGE", oldestGarbagePack);
+
+ gcWithTtl();
+ assertTrue("has packs", odb.getPacks().length > 0);
+ for (DfsPackFile pack : odb.getPacks()) {
+ assertNotEquals(oldestGarbagePack, pack.getPackDescription());
+ }
+ }
+
+ @Test
+ public void testCollectionWithGarbageCoalescence() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit0);
+
+ for (int i = 0; i < 3; i++) {
+ commit1 = commit().message("g" + i).parent(commit1).create();
+
+ // Make sure we don't have more than 1 UNREACHABLE_GARBAGE pack
+ // because they're coalesced.
+ gcNoTtl();
+ assertEquals(1, countPacks(UNREACHABLE_GARBAGE));
+ }
+ }
+
+ @Test
+ public void testCollectionWithGarbageNoCoalescence() throws Exception {
+ RevCommit commit0 = commit().message("0").create();
+ RevCommit commit1 = commit().message("1").parent(commit0).create();
+ git.update("master", commit0);
+
+ for (int i = 0; i < 3; i++) {
+ commit1 = commit().message("g" + i).parent(commit1).create();
+
+ DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+ gc.setCoalesceGarbageLimit(0);
+ gc.setGarbageTtl(0, TimeUnit.MILLISECONDS);
+ run(gc);
+ assertEquals(1 + i, countPacks(UNREACHABLE_GARBAGE));
+ }
+ }
+
+ private TestRepository<InMemoryRepository>.CommitBuilder commit() {
+ return git.commit();
+ }
+
+ private void gcNoTtl() throws IOException {
+ DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+ gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL
+ run(gc);
+ }
+
+ private void gcWithTtl() throws InterruptedException, IOException {
+ // Wait for the system clock to move by at least 1 millisecond.
+ // This allows the DfsGarbageCollector to recognize the boundary.
+ long start = System.currentTimeMillis();
+ do {
+ Thread.sleep(10);
+ } while (System.currentTimeMillis() <= start);
+
+ DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+ gc.setGarbageTtl(1, TimeUnit.MILLISECONDS);
+ run(gc);
+ }
+
+ private void run(DfsGarbageCollector gc) throws IOException {
+ assertTrue("gc repacked", gc.pack(null));
+ odb.clearCache();
+ }
+
+ private static boolean isReachable(Repository repo, AnyObjectId id)
+ throws IOException {
+ try (RevWalk rw = new RevWalk(repo)) {
+ for (Ref ref : repo.getAllRefs().values()) {
+ rw.markStart(rw.parseCommit(ref.getObjectId()));
+ }
+ for (RevCommit next; (next = rw.next()) != null;) {
+ if (AnyObjectId.equals(next, id)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private boolean isObjectInPack(AnyObjectId id, DfsPackFile pack)
+ throws IOException {
+ try (DfsReader reader = new DfsReader(odb)) {
+ return pack.hasObject(reader, id);
+ }
+ }
+
+ private static DfsPackDescription oldestPack(DfsPackDescription a,
+ DfsPackDescription b) {
+ if (a != null && a.getLastModified() < b.getLastModified()) {
+ return a;
+ }
+ return b;
+ }
+
+ private int countPacks(PackSource source) throws IOException {
+ int cnt = 0;
+ for (DfsPackFile pack : odb.getPacks()) {
+ if (pack.getPackDescription().getPackSource() == source) {
+ cnt++;
+ }
+ }
+ return cnt;
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
index 2883f4ecc1..55b2d2f127 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
@@ -54,9 +54,11 @@ import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
@@ -93,10 +95,14 @@ public class DfsGarbageCollector {
private PackConfig packConfig;
+ // See pack(), below, for how these two variables interact.
private long coalesceGarbageLimit = 50 << 20;
+ private long garbageTtlMillis = TimeUnit.DAYS.toMillis(1);
private long startTimeMillis;
private List<DfsPackFile> packsBefore;
+ private List<DfsPackFile> expiredGarbagePacks;
+
private Set<ObjectId> allHeads;
private Set<ObjectId> nonHeads;
private Set<ObjectId> txnHeads;
@@ -168,6 +174,34 @@ public class DfsGarbageCollector {
}
/**
+ * @return garbage packs older than this limit (in milliseconds) will be
+ * pruned as part of the garbage collection process if the value is
+ * > 0, otherwise garbage packs are retained.
+ */
+ public long getGarbageTtlMillis() {
+ return garbageTtlMillis;
+ }
+
+ /**
+ * Set the time to live for garbage objects.
+ * <p>
+ * Any UNREACHABLE_GARBAGE older than this limit will be pruned at the end
+ * of the run.
+ * <p>
+ * If timeToLiveMillis is set to 0, UNREACHABLE_GARBAGE purging is disabled.
+ *
+ * @param ttl
+ * Time to live whatever unit is specified.
+ * @param unit
+ * The specified time unit.
+ * @return {@code this}
+ */
+ public DfsGarbageCollector setGarbageTtl(long ttl, TimeUnit unit) {
+ garbageTtlMillis = unit.toMillis(ttl);
+ return this;
+ }
+
+ /**
* Create a single new pack file containing all of the live objects.
* <p>
* This method safely decides which packs can be expired after the new pack
@@ -189,6 +223,12 @@ public class DfsGarbageCollector {
if (packConfig.getIndexVersion() != 2)
throw new IllegalStateException(
JGitText.get().supportOnlyPackIndexVersion2);
+ if (garbageTtlMillis > 0) {
+ // We disable coalescing because the coalescing step will keep
+ // refreshing the UNREACHABLE_GARBAGE pack and we wouldn't
+ // actually prune anything.
+ coalesceGarbageLimit = 0;
+ }
startTimeMillis = System.currentTimeMillis();
ctx = (DfsReader) objdb.newReader();
@@ -197,9 +237,14 @@ public class DfsGarbageCollector {
objdb.clearCache();
Collection<Ref> refsBefore = getAllRefs();
- packsBefore = packsToRebuild();
- if (packsBefore.isEmpty())
+ readPacksBefore();
+
+ if (packsBefore.isEmpty()) {
+ if (!expiredGarbagePacks.isEmpty()) {
+ objdb.commitPack(noPacks(), toPrune());
+ }
return true;
+ }
allHeads = new HashSet<ObjectId>();
nonHeads = new HashSet<ObjectId>();
@@ -254,17 +299,60 @@ public class DfsGarbageCollector {
return refs;
}
- private List<DfsPackFile> packsToRebuild() throws IOException {
+ private void readPacksBefore() throws IOException {
DfsPackFile[] packs = objdb.getPacks();
- List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
+ packsBefore = new ArrayList<DfsPackFile>(packs.length);
+ expiredGarbagePacks = new ArrayList<DfsPackFile>(packs.length);
+
+ long mostRecentGC = mostRecentGC(packs);
+ long now = System.currentTimeMillis();
+ for (DfsPackFile p : packs) {
+ DfsPackDescription d = p.getPackDescription();
+ if (d.getPackSource() != UNREACHABLE_GARBAGE) {
+ packsBefore.add(p);
+ } else if (packIsExpiredGarbage(d, mostRecentGC, now)) {
+ expiredGarbagePacks.add(p);
+ } else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) {
+ packsBefore.add(p);
+ }
+ }
+ }
+
+ private static long mostRecentGC(DfsPackFile[] packs) {
+ long r = 0;
for (DfsPackFile p : packs) {
DfsPackDescription d = p.getPackDescription();
- if (d.getPackSource() != UNREACHABLE_GARBAGE)
- out.add(p);
- else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
- out.add(p);
+ if (d.getPackSource() == GC || d.getPackSource() == GC_REST) {
+ r = Math.max(r, d.getLastModified());
+ }
}
- return out;
+ return r;
+ }
+
+ private boolean packIsExpiredGarbage(DfsPackDescription d,
+ long mostRecentGC, long now) {
+ // It should be safe to remove an UNREACHABLE_GARBAGE pack if it:
+ //
+ // (a) Predates the most recent prior run of this class. This check
+ // ensures the graph traversal algorithm had a chance to consider
+ // all objects in this pack and copied them into a GC or GC_REST
+ // pack if the graph contained live edges to the objects.
+ //
+ // This check is safe because of the ordering of packing; the GC
+ // packs are written first and then the UNREACHABLE_GARBAGE is
+ // constructed. Any UNREACHABLE_GARBAGE dated earlier than the GC
+ // was input to the prior GC's graph traversal.
+ //
+ // (b) Is older than garbagePackTtl. This check gives concurrent
+ // inserter threads sufficient time to identify an object is not
+ // in the graph and should have a new copy written, rather than
+ // relying on something from an UNREACHABLE_GARBAGE pack.
+ //
+ // Both (a) and (b) must be met to safely remove UNREACHABLE_GARBAGE.
+ return d.getPackSource() == UNREACHABLE_GARBAGE
+ && d.getLastModified() < mostRecentGC
+ && garbageTtlMillis > 0
+ && now - d.getLastModified() >= garbageTtlMillis;
}
/** @return all of the source packs that fed into this compaction. */
@@ -285,8 +373,12 @@ public class DfsGarbageCollector {
private List<DfsPackDescription> toPrune() {
int cnt = packsBefore.size();
List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
- for (DfsPackFile pack : packsBefore)
+ for (DfsPackFile pack : packsBefore) {
+ all.add(pack.getPackDescription());
+ }
+ for (DfsPackFile pack : expiredGarbagePacks) {
all.add(pack.getPackDescription());
+ }
return all;
}
@@ -329,7 +421,6 @@ public class DfsGarbageCollector {
}
private void packGarbage(ProgressMonitor pm) throws IOException {
- // TODO(sop) This is ugly. The garbage pack needs to be deleted.
PackConfig cfg = new PackConfig(packConfig);
cfg.setReuseDeltas(true);
cfg.setReuseObjects(true);
@@ -420,4 +511,8 @@ public class DfsGarbageCollector {
DfsBlockCache.getInstance().getOrCreate(pack, null);
return pack;
}
+
+ private static List<DfsPackDescription> noPacks() {
+ return Collections.emptyList();
+ }
}