]> source.dussan.org Git - jgit.git/commitdiff
dfs: write reftable from DfsGarbageCollector 72/101472/18
authorShawn Pearce <spearce@spearce.org>
Thu, 10 Aug 2017 23:41:26 +0000 (16:41 -0700)
committerShawn Pearce <spearce@spearce.org>
Tue, 5 Sep 2017 16:10:16 +0000 (09:10 -0700)
If a ReftableConfig has been supplied by the caller, write out a
reftable as a sibling of the the GC pack, alongside the heads.

To bootstrap from a non-reftable system, the refs are read from the
DfsRefDatabase if no GC reftables are present.  Its assumed the
references are fully current, and do not need to be merged with any
other reftables.  Any non-GC reftables will be pruned at the end of
the GC cycle, just like any packs that were replaced.

If a GC reftable is present, all existing reftables are compacted, and
references from DfsRefDatabase are only used to seed the packer.  Its
assumed these are consistent with each other.

Change-Id: Ie397eb58aaaefb6865c816d9b39de3ac12998019

org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java

index e4dcc2e87366ac673a736459f6fb177514b997f0..55a5f726de61258e26d4e8caed04d59447277534 100644 (file)
@@ -5,6 +5,7 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -13,19 +14,29 @@ import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.concurrent.TimeUnit;
 
 import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
+import org.eclipse.jgit.internal.storage.reftable.RefCursor;
+import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
+import org.eclipse.jgit.internal.storage.reftable.ReftableReader;
+import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
 import org.eclipse.jgit.junit.MockSystemReader;
 import org.eclipse.jgit.junit.TestRepository;
 import org.eclipse.jgit.lib.AnyObjectId;
+import org.eclipse.jgit.lib.BatchRefUpdate;
+import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectIdRef;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.revwalk.RevBlob;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.storage.pack.PackConfig;
+import org.eclipse.jgit.transport.ReceiveCommand;
 import org.eclipse.jgit.util.SystemReader;
 import org.junit.After;
 import org.junit.Before;
@@ -653,6 +664,185 @@ public class DfsGarbageCollectorTest {
                assertEquals(2, odb.getPacks().length);
        }
 
+       @SuppressWarnings("boxing")
+       @Test
+       public void producesNewReftable() throws Exception {
+               String master = "refs/heads/master";
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+
+               BatchRefUpdate bru = git.getRepository().getRefDatabase()
+                               .newBatchUpdate();
+               bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit1, master));
+               for (int i = 1; i <= 5100; i++) {
+                       bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit0,
+                                       String.format("refs/pulls/%04d", i)));
+               }
+               try (RevWalk rw = new RevWalk(git.getRepository())) {
+                       bru.execute(rw, NullProgressMonitor.INSTANCE);
+               }
+
+               DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+               gc.setReftableConfig(new ReftableConfig());
+               run(gc);
+
+               // Single GC pack present with all objects.
+               assertEquals(1, odb.getPacks().length);
+               DfsPackFile pack = odb.getPacks()[0];
+               DfsPackDescription desc = pack.getPackDescription();
+               assertEquals(GC, desc.getPackSource());
+               assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
+               assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
+
+               // Sibling REFTABLE is also present.
+               assertTrue(desc.hasFileExt(REFTABLE));
+               ReftableWriter.Stats stats = desc.getReftableStats();
+               assertNotNull(stats);
+               assertTrue(stats.totalBytes() > 0);
+               assertEquals(5101, stats.refCount());
+               assertEquals(1, stats.minUpdateIndex());
+               assertEquals(1, stats.maxUpdateIndex());
+
+               DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc);
+               try (DfsReader ctx = odb.newReader();
+                               ReftableReader rr = table.open(ctx);
+                               RefCursor rc = rr.seekRef("refs/pulls/5100")) {
+                       assertTrue(rc.next());
+                       assertEquals(commit0, rc.getRef().getObjectId());
+                       assertFalse(rc.next());
+               }
+       }
+
+       @Test
+       public void leavesNonGcReftablesIfNotConfigured() throws Exception {
+               String master = "refs/heads/master";
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+               git.update(master, commit1);
+
+               DfsPackDescription t1 = odb.newPack(INSERT);
+               try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
+                       out.write("ignored".getBytes(StandardCharsets.UTF_8));
+                       t1.addFileExt(REFTABLE);
+               }
+               odb.commitPack(Collections.singleton(t1), null);
+
+               DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+               gc.setReftableConfig(null);
+               run(gc);
+
+               // Single GC pack present with all objects.
+               assertEquals(1, odb.getPacks().length);
+               DfsPackFile pack = odb.getPacks()[0];
+               DfsPackDescription desc = pack.getPackDescription();
+               assertEquals(GC, desc.getPackSource());
+               assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
+               assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
+
+               // Only INSERT REFTABLE above is present.
+               DfsReftable[] tables = odb.getReftables();
+               assertEquals(1, tables.length);
+               assertEquals(t1, tables[0].getPackDescription());
+       }
+
+       @Test
+       public void prunesNonGcReftables() throws Exception {
+               String master = "refs/heads/master";
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+               git.update(master, commit1);
+
+               DfsPackDescription t1 = odb.newPack(INSERT);
+               try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
+                       out.write("ignored".getBytes(StandardCharsets.UTF_8));
+                       t1.addFileExt(REFTABLE);
+               }
+               odb.commitPack(Collections.singleton(t1), null);
+               odb.clearCache();
+
+               DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+               gc.setReftableConfig(new ReftableConfig());
+               run(gc);
+
+               // Single GC pack present with all objects.
+               assertEquals(1, odb.getPacks().length);
+               DfsPackFile pack = odb.getPacks()[0];
+               DfsPackDescription desc = pack.getPackDescription();
+               assertEquals(GC, desc.getPackSource());
+               assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
+               assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
+
+               // Only sibling GC REFTABLE is present.
+               DfsReftable[] tables = odb.getReftables();
+               assertEquals(1, tables.length);
+               assertEquals(desc, tables[0].getPackDescription());
+               assertTrue(desc.hasFileExt(REFTABLE));
+       }
+
+       @Test
+       public void compactsReftables() throws Exception {
+               String master = "refs/heads/master";
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+               git.update(master, commit1);
+
+               DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+               gc.setReftableConfig(new ReftableConfig());
+               run(gc);
+
+               DfsPackDescription t1 = odb.newPack(INSERT);
+               Ref next = new ObjectIdRef.PeeledNonTag(Ref.Storage.LOOSE,
+                               "refs/heads/next", commit0.copy());
+               try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
+                       ReftableWriter w = new ReftableWriter();
+                       w.setMinUpdateIndex(42);
+                       w.setMaxUpdateIndex(42);
+                       w.begin(out);
+                       w.sortAndWriteRefs(Collections.singleton(next));
+                       w.finish();
+                       t1.addFileExt(REFTABLE);
+                       t1.setReftableStats(w.getStats());
+               }
+               odb.commitPack(Collections.singleton(t1), null);
+
+               gc = new DfsGarbageCollector(repo);
+               gc.setReftableConfig(new ReftableConfig());
+               run(gc);
+
+               // Single GC pack present with all objects.
+               assertEquals(1, odb.getPacks().length);
+               DfsPackFile pack = odb.getPacks()[0];
+               DfsPackDescription desc = pack.getPackDescription();
+               assertEquals(GC, desc.getPackSource());
+               assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
+               assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
+
+               // Only sibling GC REFTABLE is present.
+               DfsReftable[] tables = odb.getReftables();
+               assertEquals(1, tables.length);
+               assertEquals(desc, tables[0].getPackDescription());
+               assertTrue(desc.hasFileExt(REFTABLE));
+
+               // GC reftable contains the compaction.
+               DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc);
+               try (DfsReader ctx = odb.newReader();
+                               ReftableReader rr = table.open(ctx);
+                               RefCursor rc = rr.allRefs()) {
+                       assertEquals(1, rr.minUpdateIndex());
+                       assertEquals(42, rr.maxUpdateIndex());
+
+                       assertTrue(rc.next());
+                       assertEquals(master, rc.getRef().getName());
+                       assertEquals(commit1, rc.getRef().getObjectId());
+
+                       assertTrue(rc.next());
+                       assertEquals(next.getName(), rc.getRef().getName());
+                       assertEquals(commit0, rc.getRef().getObjectId());
+
+                       assertFalse(rc.next());
+               }
+       }
+
        private TestRepository<InMemoryRepository>.CommitBuilder commit() {
                return git.commit();
        }
index ce2b05382a7d47be4e9f9dbc9b53f43e3af9a469..7914d587a3fda76852578e8cb26f0bc96d831dac 100644 (file)
@@ -50,13 +50,16 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.RECEIVE;
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
+import static org.eclipse.jgit.internal.storage.dfs.DfsPackCompactor.configureReftable;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
 import static org.eclipse.jgit.internal.storage.pack.PackWriter.NONE;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collection;
 import java.util.EnumSet;
@@ -72,6 +75,9 @@ import org.eclipse.jgit.internal.storage.file.PackIndex;
 import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
 import org.eclipse.jgit.internal.storage.pack.PackExt;
 import org.eclipse.jgit.internal.storage.pack.PackWriter;
+import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
+import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
+import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
 import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.Constants;
@@ -94,14 +100,15 @@ public class DfsGarbageCollector {
        private final DfsObjDatabase objdb;
 
        private final List<DfsPackDescription> newPackDesc;
-
        private final List<PackStatistics> newPackStats;
-
        private final List<ObjectIdSet> newPackObj;
 
        private DfsReader ctx;
 
        private PackConfig packConfig;
+       private ReftableConfig reftableConfig;
+       private long reftableInitialMinUpdateIndex = 1;
+       private long reftableInitialMaxUpdateIndex = 1;
 
        // See packIsCoalesceableGarbage(), below, for how these two variables
        // interact.
@@ -110,8 +117,10 @@ public class DfsGarbageCollector {
 
        private long startTimeMillis;
        private List<DfsPackFile> packsBefore;
+       private List<DfsReftable> reftablesBefore;
        private List<DfsPackFile> expiredGarbagePacks;
 
+       private Collection<Ref> refsBefore;
        private Set<ObjectId> allHeadsAndTags;
        private Set<ObjectId> allTags;
        private Set<ObjectId> nonHeads;
@@ -151,6 +160,57 @@ public class DfsGarbageCollector {
                return this;
        }
 
+       /**
+        * @param cfg
+        *            configuration to write a reftable. Reftable writing is
+        *            disabled (default) when {@code cfg} is {@code null}.
+        * @return {@code this}
+        */
+       public DfsGarbageCollector setReftableConfig(ReftableConfig cfg) {
+               reftableConfig = cfg;
+               return this;
+       }
+
+       /**
+        * Set minUpdateIndex for the initial reftable created during conversion.
+        * <p>
+        * <b>Warning:</b> A setting {@code != 1} <b>disables cache refreshes</b>
+        * normally performed at the start of {@link #pack(ProgressMonitor)}.
+        * Callers must ensure the reference cache is current and will have been
+        * read before the pack list.
+        *
+        * @param u
+        *            minUpdateIndex for the initial reftable created by scanning
+        *            {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller
+        *            has also set {@link #setReftableConfig(ReftableConfig)}.
+        *            Defaults to {@code 1}. Must be {@code u >= 0}.
+        * @return {@code this}
+        */
+       public DfsGarbageCollector setReftableInitialMinUpdateIndex(long u) {
+               reftableInitialMinUpdateIndex = Math.max(u, 0);
+               return this;
+       }
+
+       /**
+        * Set maxUpdateIndex for the initial reftable created during conversion.
+        * <p>
+        * <b>Warning:</b> A setting {@code != 1} <b>disables cache refreshes</b>
+        * normally performed at the start of {@link #pack(ProgressMonitor)}.
+        * Callers must ensure the reference cache is current and will have been
+        * read before the pack list.
+        *
+        * @param u
+        *            maxUpdateIndex for the initial reftable created by scanning
+        *            {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller
+        *            has also set {@link #setReftableConfig(ReftableConfig)}.
+        *            Defaults to {@code 1}. Must be {@code u >= 0}.
+        * @return {@code this}
+        */
+       public DfsGarbageCollector setReftableInitialMaxUpdateIndex(long u) {
+               reftableInitialMaxUpdateIndex = Math.max(0, u);
+               return this;
+       }
+
        /** @return garbage packs smaller than this size will be repacked. */
        public long getCoalesceGarbageLimit() {
                return coalesceGarbageLimit;
@@ -240,8 +300,9 @@ public class DfsGarbageCollector {
                        refdb.refresh();
                        objdb.clearCache();
 
-                       Collection<Ref> refsBefore = getAllRefs();
+                       refsBefore = getAllRefs();
                        readPacksBefore();
+                       readReftablesBefore();
 
                        Set<ObjectId> allHeads = new HashSet<>();
                        allHeadsAndTags = new HashSet<>();
@@ -333,6 +394,11 @@ public class DfsGarbageCollector {
                }
        }
 
+       private void readReftablesBefore() throws IOException {
+               DfsReftable[] tables = objdb.getReftables();
+               reftablesBefore = new ArrayList<>(Arrays.asList(tables));
+       }
+
        private boolean packIsExpiredGarbage(DfsPackDescription d, long now) {
                // Consider the garbage pack as expired when it's older than
                // garbagePackTtl. This check gives concurrent inserter threads
@@ -407,7 +473,7 @@ public class DfsGarbageCollector {
        }
 
        /** @return all of the source packs that fed into this compaction. */
-       public List<DfsPackDescription> getSourcePacks() {
+       public Set<DfsPackDescription> getSourcePacks() {
                return toPrune();
        }
 
@@ -421,28 +487,37 @@ public class DfsGarbageCollector {
                return newPackStats;
        }
 
-       private List<DfsPackDescription> toPrune() {
-               int cnt = packsBefore.size();
-               List<DfsPackDescription> all = new ArrayList<>(cnt);
+       private Set<DfsPackDescription> toPrune() {
+               Set<DfsPackDescription> toPrune = new HashSet<>();
                for (DfsPackFile pack : packsBefore) {
-                       all.add(pack.getPackDescription());
+                       toPrune.add(pack.getPackDescription());
+               }
+               if (reftableConfig != null) {
+                       for (DfsReftable table : reftablesBefore) {
+                               toPrune.add(table.getPackDescription());
+                       }
                }
                for (DfsPackFile pack : expiredGarbagePacks) {
-                       all.add(pack.getPackDescription());
+                       toPrune.add(pack.getPackDescription());
                }
-               return all;
+               return toPrune;
        }
 
        private void packHeads(ProgressMonitor pm) throws IOException {
-               if (allHeadsAndTags.isEmpty())
+               if (allHeadsAndTags.isEmpty()) {
+                       writeReftable();
                        return;
+               }
 
                try (PackWriter pw = newPackWriter()) {
                        pw.setTagTargets(tagTargets);
                        pw.preparePack(pm, allHeadsAndTags, NONE, NONE, allTags);
-                       if (0 < pw.getObjectCount())
-                               writePack(GC, pw, pm,
-                                               estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC));
+                       if (0 < pw.getObjectCount()) {
+                               long estSize = estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC);
+                               writePack(GC, pw, pm, estSize);
+                       } else {
+                               writeReftable();
+                       }
                }
        }
 
@@ -560,6 +635,10 @@ public class DfsGarbageCollector {
                                estimatedPackSize);
                newPackDesc.add(pack);
 
+               if (source == GC && reftableConfig != null) {
+                       writeReftable(pack);
+               }
+
                try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
                        pw.writePack(pm, pm, out);
                        pack.addFileExt(PACK);
@@ -592,4 +671,60 @@ public class DfsGarbageCollector {
                newPackObj.add(pw.getObjectSet());
                return pack;
        }
+
+       private void writeReftable() throws IOException {
+               if (reftableConfig != null) {
+                       DfsPackDescription pack = objdb.newPack(GC);
+                       newPackDesc.add(pack);
+                       writeReftable(pack);
+               }
+       }
+
+       private void writeReftable(DfsPackDescription pack) throws IOException {
+               if (!hasGcReftable()) {
+                       writeReftable(pack, refsBefore);
+                       return;
+               }
+
+               try (ReftableStack stack = ReftableStack.open(ctx, reftablesBefore)) {
+                       ReftableCompactor compact = new ReftableCompactor();
+                       compact.addAll(stack.readers());
+                       compact.setIncludeDeletes(false);
+                       compactReftable(pack, compact);
+               }
+       }
+
+       private boolean hasGcReftable() {
+               for (DfsReftable table : reftablesBefore) {
+                       if (table.getPackDescription().getPackSource() == GC) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       private void writeReftable(DfsPackDescription pack, Collection<Ref> refs)
+                       throws IOException {
+               try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
+                       ReftableConfig cfg = configureReftable(reftableConfig, out);
+                       ReftableWriter writer = new ReftableWriter(cfg)
+                                       .setMinUpdateIndex(reftableInitialMinUpdateIndex)
+                                       .setMaxUpdateIndex(reftableInitialMaxUpdateIndex)
+                                       .begin(out)
+                                       .sortAndWriteRefs(refs)
+                                       .finish();
+                       pack.addFileExt(REFTABLE);
+                       pack.setReftableStats(writer.getStats());
+               }
+       }
+
+       private void compactReftable(DfsPackDescription pack,
+                       ReftableCompactor compact) throws IOException {
+               try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
+                       compact.setConfig(configureReftable(reftableConfig, out));
+                       compact.compact(out);
+                       pack.addFileExt(REFTABLE);
+                       pack.setReftableStats(compact.getStats());
+               }
+       }
 }