From 40051505d7aaccfe2efaf5f3022f1d99a3976554 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 12 Jan 2016 10:50:36 -0800 Subject: GC: Pack RefTrees in their own pack The RefTree graph needs to be quickly accessed to read references. It is also distinct graph disconnected from the rest of the repository. Store the commit and tree objects in their own pack. Change-Id: Icbb735be8fa91ccbf0708ca3a219b364e11a6b83 --- .../internal/storage/dfs/DfsGarbageCollector.java | 38 +++++-- .../jgit/internal/storage/dfs/DfsObjDatabase.java | 7 ++ .../org/eclipse/jgit/internal/storage/file/GC.java | 61 ++++++---- .../internal/storage/reftree/RefTreeNames.java | 124 +++++++++++++++++++++ 4 files changed, 197 insertions(+), 33 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftree/RefTreeNames.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java index bb51fc4ca7..c48a49da7a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java @@ -44,18 +44,18 @@ package org.eclipse.jgit.internal.storage.dfs; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC; +import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; -import static org.eclipse.jgit.lib.RefDatabase.ALL; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import org.eclipse.jgit.internal.JGitText; @@ -63,6 +63,7 @@ import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.internal.storage.pack.PackWriter; +import org.eclipse.jgit.internal.storage.reftree.RefTreeNames; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.NullProgressMonitor; @@ -94,14 +95,11 @@ public class DfsGarbageCollector { private long coalesceGarbageLimit = 50 << 20; - private Map refsBefore; - private List packsBefore; private Set allHeads; - private Set nonHeads; - + private Set txnHeads; private Set tagTargets; /** @@ -197,19 +195,22 @@ public class DfsGarbageCollector { refdb.refresh(); objdb.clearCache(); - refsBefore = refdb.getRefs(ALL); + Collection refsBefore = RefTreeNames.allRefs(refdb); packsBefore = packsToRebuild(); if (packsBefore.isEmpty()) return true; allHeads = new HashSet(); nonHeads = new HashSet(); + txnHeads = new HashSet(); tagTargets = new HashSet(); - for (Ref ref : refsBefore.values()) { + for (Ref ref : refsBefore) { if (ref.isSymbolic() || ref.getObjectId() == null) continue; if (isHead(ref)) allHeads.add(ref.getObjectId()); + else if (RefTreeNames.isRefTree(refdb, ref.getName())) + txnHeads.add(ref.getObjectId()); else nonHeads.add(ref.getObjectId()); if (ref.getPeeledObjectId() != null) @@ -221,6 +222,7 @@ public class DfsGarbageCollector { try { packHeads(pm); packRest(pm); + packRefTreeGraph(pm); packGarbage(pm); objdb.commitPack(newPackDesc, toPrune()); rollback = false; @@ -276,12 +278,11 @@ public class DfsGarbageCollector { try (PackWriter pw = newPackWriter()) { pw.setTagTargets(tagTargets); - pw.preparePack(pm, allHeads, Collections. emptySet()); + pw.preparePack(pm, allHeads, none()); if (0 < pw.getObjectCount()) writePack(GC, pw, pm); } } - private void packRest(ProgressMonitor pm) throws IOException { if (nonHeads.isEmpty()) return; @@ -295,6 +296,23 @@ public class DfsGarbageCollector { } } + private void packRefTreeGraph(ProgressMonitor pm) throws IOException { + if (txnHeads.isEmpty()) + return; + + try (PackWriter pw = newPackWriter()) { + for (ObjectIdSet packedObjs : newPackObj) + pw.excludeObjects(packedObjs); + pw.preparePack(pm, txnHeads, none()); + if (0 < pw.getObjectCount()) + writePack(GC_TXN, pw, pm); + } + } + + private static Set none() { + return Collections. emptySet(); + } + private void packGarbage(ProgressMonitor pm) throws IOException { // TODO(sop) This is ugly. The garbage pack needs to be deleted. PackConfig cfg = new PackConfig(packConfig); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjDatabase.java index 5f491ff2fd..3641560ee9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjDatabase.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjDatabase.java @@ -90,6 +90,13 @@ public abstract class DfsObjDatabase extends ObjectDatabase { */ GC(1), + /** + * RefTreeGraph pack was created by Git garbage collection. + * + * @see DfsGarbageCollector + */ + GC_TXN(1), + /** * The pack was created by compacting multiple packs together. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java index a5c95b3bca..8677164a67 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java @@ -45,7 +45,6 @@ package org.eclipse.jgit.internal.storage.file; import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; -import static org.eclipse.jgit.lib.RefDatabase.ALL; import java.io.File; import java.io.FileOutputStream; @@ -63,11 +62,9 @@ import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Objects; import java.util.Set; import java.util.TreeMap; @@ -80,6 +77,7 @@ import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.internal.storage.pack.PackWriter; +import org.eclipse.jgit.internal.storage.reftree.RefTreeNames; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; @@ -128,7 +126,7 @@ public class GC { * difference between the current refs and the refs which existed during * last {@link #repack()}. */ - private Map lastPackedRefs; + private Collection lastPackedRefs; /** * Holds the starting time of the last repack() execution. This is needed in @@ -362,17 +360,20 @@ public class GC { // during last repack(). Only those refs will survive which have been // added or modified since the last repack. Only these can save existing // loose refs from being pruned. - Map newRefs; + Collection newRefs; if (lastPackedRefs == null || lastPackedRefs.isEmpty()) newRefs = getAllRefs(); else { - newRefs = new HashMap(); - for (Iterator> i = getAllRefs().entrySet() - .iterator(); i.hasNext();) { - Entry newEntry = i.next(); - Ref old = lastPackedRefs.get(newEntry.getKey()); - if (!equals(newEntry.getValue(), old)) - newRefs.put(newEntry.getKey(), newEntry.getValue()); + Map last = new HashMap<>(); + for (Ref r : lastPackedRefs) { + last.put(r.getName(), r); + } + newRefs = new ArrayList<>(); + for (Ref r : getAllRefs()) { + Ref old = last.get(r.getName()); + if (!equals(r, old)) { + newRefs.add(r); + } } } @@ -384,10 +385,10 @@ public class GC { // leave this method. ObjectWalk w = new ObjectWalk(repo); try { - for (Ref cr : newRefs.values()) + for (Ref cr : newRefs) w.markStart(w.parseAny(cr.getObjectId())); if (lastPackedRefs != null) - for (Ref lpr : lastPackedRefs.values()) + for (Ref lpr : lastPackedRefs) w.markUninteresting(w.parseAny(lpr.getObjectId())); removeReferenced(deletionCandidates, w); } finally { @@ -405,11 +406,11 @@ public class GC { // additional reflog entries not handled during last repack() ObjectWalk w = new ObjectWalk(repo); try { - for (Ref ar : getAllRefs().values()) + for (Ref ar : getAllRefs()) for (ObjectId id : listRefLogObjects(ar, lastRepackTime)) w.markStart(w.parseAny(id)); if (lastPackedRefs != null) - for (Ref lpr : lastPackedRefs.values()) + for (Ref lpr : lastPackedRefs) w.markUninteresting(w.parseAny(lpr.getObjectId())); removeReferenced(deletionCandidates, w); } finally { @@ -530,19 +531,23 @@ public class GC { Collection toBeDeleted = repo.getObjectDatabase().getPacks(); long time = System.currentTimeMillis(); - Map refsBefore = getAllRefs(); + Collection refsBefore = getAllRefs(); Set allHeads = new HashSet(); Set nonHeads = new HashSet(); + Set txnHeads = new HashSet(); Set tagTargets = new HashSet(); Set indexObjects = listNonHEADIndexObjects(); + RefDatabase refdb = repo.getRefDatabase(); - for (Ref ref : refsBefore.values()) { + for (Ref ref : refsBefore) { nonHeads.addAll(listRefLogObjects(ref, 0)); if (ref.isSymbolic() || ref.getObjectId() == null) continue; if (ref.getName().startsWith(Constants.R_HEADS)) allHeads.add(ref.getObjectId()); + else if (RefTreeNames.isRefTree(refdb, ref.getName())) + txnHeads.add(ref.getObjectId()); else nonHeads.add(ref.getObjectId()); if (ref.getPeeledObjectId() != null) @@ -572,6 +577,11 @@ public class GC { if (rest != null) ret.add(rest); } + if (!txnHeads.isEmpty()) { + PackFile txn = writePack(txnHeads, null, null, excluded); + if (txn != null) + ret.add(txn); + } try { deleteOldPacks(toBeDeleted, ret); } catch (ParseException e) { @@ -624,11 +634,16 @@ public class GC { * @return a map where names of refs point to ref objects * @throws IOException */ - private Map getAllRefs() throws IOException { - Map ret = repo.getRefDatabase().getRefs(ALL); - for (Ref ref : repo.getRefDatabase().getAdditionalRefs()) - ret.put(ref.getName(), ref); - return ret; + private Collection getAllRefs() throws IOException { + Collection refs = RefTreeNames.allRefs(repo.getRefDatabase()); + List addl = repo.getRefDatabase().getAdditionalRefs(); + if (!addl.isEmpty()) { + List all = new ArrayList<>(refs.size() + addl.size()); + all.addAll(refs); + all.addAll(addl); + return all; + } + return refs; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftree/RefTreeNames.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftree/RefTreeNames.java new file mode 100644 index 0000000000..239a745277 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftree/RefTreeNames.java @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2016, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.reftree; + +import static org.eclipse.jgit.lib.RefDatabase.ALL; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.RefDatabase; + +/** Magic reference name logic for RefTrees. */ +public class RefTreeNames { + /** + * Suffix used on a {@link RefTreeDatabase#getTxnNamespace()} for user data. + *

+ * A {@link RefTreeDatabase}'s namespace may include a subspace (e.g. + * {@code "refs/txn/stage/"}) containing commit objects from the usual user + * portion of the repository (e.g. {@code "refs/heads/"}). These should be + * packed by the garbage collector alongside other user content rather than + * with the RefTree. + */ + private static final String STAGE = "stage/"; //$NON-NLS-1$ + + /** + * Determine if the reference is likely to be a RefTree. + * + * @param refdb + * database instance. + * @param ref + * reference name. + * @return {@code true} if the reference is a RefTree. + */ + public static boolean isRefTree(RefDatabase refdb, String ref) { + if (refdb instanceof RefTreeDatabase) { + RefTreeDatabase b = (RefTreeDatabase) refdb; + if (ref.equals(b.getTxnCommitted())) { + return true; + } + + String namespace = b.getTxnNamespace(); + if (namespace != null + && ref.startsWith(namespace) + && !ref.startsWith(namespace + STAGE)) { + return true; + } + } + return false; + } + + /** + * Snapshot all references from a RefTreeDatabase and its bootstrap. + *

+ * There may be name conflicts with multiple {@link Ref} objects containing + * the same name in the returned collection. + * + * @param refdb + * database instance. + * @return all known references. + * @throws IOException + * references cannot be enumerated. + */ + public static Collection allRefs(RefDatabase refdb) + throws IOException { + Collection refs = refdb.getRefs(ALL).values(); + if (!(refdb instanceof RefTreeDatabase)) { + return refs; + } + + RefDatabase bootstrap = ((RefTreeDatabase) refdb).getBootstrap(); + Collection br = bootstrap.getRefs(ALL).values(); + List all = new ArrayList<>(refs.size() + br.size()); + all.addAll(refs); + all.addAll(br); + return all; + } + + private RefTreeNames() { + } +} -- cgit v1.2.3