Browse Source

GC: Pack RefTrees in their own pack

The RefTree graph needs to be quickly accessed to read references.
It is also distinct graph disconnected from the rest of the
repository. Store the commit and tree objects in their own pack.

Change-Id: Icbb735be8fa91ccbf0708ca3a219b364e11a6b83
tags/v4.2.0.201601211800-r
Shawn Pearce 8 years ago
parent
commit
40051505d7

+ 28
- 10
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java View File

@@ -44,18 +44,18 @@
package org.eclipse.jgit.internal.storage.dfs;

import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_TXN;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.lib.RefDatabase.ALL;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.eclipse.jgit.internal.JGitText;
@@ -63,6 +63,7 @@ import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.NullProgressMonitor;
@@ -94,14 +95,11 @@ public class DfsGarbageCollector {

private long coalesceGarbageLimit = 50 << 20;

private Map<String, Ref> refsBefore;

private List<DfsPackFile> packsBefore;

private Set<ObjectId> allHeads;

private Set<ObjectId> nonHeads;
private Set<ObjectId> txnHeads;
private Set<ObjectId> tagTargets;

/**
@@ -197,19 +195,22 @@ public class DfsGarbageCollector {
refdb.refresh();
objdb.clearCache();

refsBefore = refdb.getRefs(ALL);
Collection<Ref> refsBefore = RefTreeNames.allRefs(refdb);
packsBefore = packsToRebuild();
if (packsBefore.isEmpty())
return true;

allHeads = new HashSet<ObjectId>();
nonHeads = new HashSet<ObjectId>();
txnHeads = new HashSet<ObjectId>();
tagTargets = new HashSet<ObjectId>();
for (Ref ref : refsBefore.values()) {
for (Ref ref : refsBefore) {
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
if (isHead(ref))
allHeads.add(ref.getObjectId());
else if (RefTreeNames.isRefTree(refdb, ref.getName()))
txnHeads.add(ref.getObjectId());
else
nonHeads.add(ref.getObjectId());
if (ref.getPeeledObjectId() != null)
@@ -221,6 +222,7 @@ public class DfsGarbageCollector {
try {
packHeads(pm);
packRest(pm);
packRefTreeGraph(pm);
packGarbage(pm);
objdb.commitPack(newPackDesc, toPrune());
rollback = false;
@@ -276,12 +278,11 @@ public class DfsGarbageCollector {

try (PackWriter pw = newPackWriter()) {
pw.setTagTargets(tagTargets);
pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
pw.preparePack(pm, allHeads, none());
if (0 < pw.getObjectCount())
writePack(GC, pw, pm);
}
}

private void packRest(ProgressMonitor pm) throws IOException {
if (nonHeads.isEmpty())
return;
@@ -295,6 +296,23 @@ public class DfsGarbageCollector {
}
}

private void packRefTreeGraph(ProgressMonitor pm) throws IOException {
if (txnHeads.isEmpty())
return;

try (PackWriter pw = newPackWriter()) {
for (ObjectIdSet packedObjs : newPackObj)
pw.excludeObjects(packedObjs);
pw.preparePack(pm, txnHeads, none());
if (0 < pw.getObjectCount())
writePack(GC_TXN, pw, pm);
}
}

private static Set<ObjectId> none() {
return Collections.<ObjectId> emptySet();
}

private void packGarbage(ProgressMonitor pm) throws IOException {
// TODO(sop) This is ugly. The garbage pack needs to be deleted.
PackConfig cfg = new PackConfig(packConfig);

+ 7
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjDatabase.java View File

@@ -90,6 +90,13 @@ public abstract class DfsObjDatabase extends ObjectDatabase {
*/
GC(1),

/**
* RefTreeGraph pack was created by Git garbage collection.
*
* @see DfsGarbageCollector
*/
GC_TXN(1),

/**
* The pack was created by compacting multiple packs together.
* <p>

+ 38
- 23
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java View File

@@ -45,7 +45,6 @@ package org.eclipse.jgit.internal.storage.file;

import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.lib.RefDatabase.ALL;

import java.io.File;
import java.io.FileOutputStream;
@@ -63,11 +62,9 @@ import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
@@ -80,6 +77,7 @@ import org.eclipse.jgit.errors.NoWorkTreeException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@@ -128,7 +126,7 @@ public class GC {
* difference between the current refs and the refs which existed during
* last {@link #repack()}.
*/
private Map<String, Ref> lastPackedRefs;
private Collection<Ref> lastPackedRefs;

/**
* Holds the starting time of the last repack() execution. This is needed in
@@ -362,17 +360,20 @@ public class GC {
// during last repack(). Only those refs will survive which have been
// added or modified since the last repack. Only these can save existing
// loose refs from being pruned.
Map<String, Ref> newRefs;
Collection<Ref> newRefs;
if (lastPackedRefs == null || lastPackedRefs.isEmpty())
newRefs = getAllRefs();
else {
newRefs = new HashMap<String, Ref>();
for (Iterator<Map.Entry<String, Ref>> i = getAllRefs().entrySet()
.iterator(); i.hasNext();) {
Entry<String, Ref> newEntry = i.next();
Ref old = lastPackedRefs.get(newEntry.getKey());
if (!equals(newEntry.getValue(), old))
newRefs.put(newEntry.getKey(), newEntry.getValue());
Map<String, Ref> last = new HashMap<>();
for (Ref r : lastPackedRefs) {
last.put(r.getName(), r);
}
newRefs = new ArrayList<>();
for (Ref r : getAllRefs()) {
Ref old = last.get(r.getName());
if (!equals(r, old)) {
newRefs.add(r);
}
}
}

@@ -384,10 +385,10 @@ public class GC {
// leave this method.
ObjectWalk w = new ObjectWalk(repo);
try {
for (Ref cr : newRefs.values())
for (Ref cr : newRefs)
w.markStart(w.parseAny(cr.getObjectId()));
if (lastPackedRefs != null)
for (Ref lpr : lastPackedRefs.values())
for (Ref lpr : lastPackedRefs)
w.markUninteresting(w.parseAny(lpr.getObjectId()));
removeReferenced(deletionCandidates, w);
} finally {
@@ -405,11 +406,11 @@ public class GC {
// additional reflog entries not handled during last repack()
ObjectWalk w = new ObjectWalk(repo);
try {
for (Ref ar : getAllRefs().values())
for (Ref ar : getAllRefs())
for (ObjectId id : listRefLogObjects(ar, lastRepackTime))
w.markStart(w.parseAny(id));
if (lastPackedRefs != null)
for (Ref lpr : lastPackedRefs.values())
for (Ref lpr : lastPackedRefs)
w.markUninteresting(w.parseAny(lpr.getObjectId()));
removeReferenced(deletionCandidates, w);
} finally {
@@ -530,19 +531,23 @@ public class GC {
Collection<PackFile> toBeDeleted = repo.getObjectDatabase().getPacks();

long time = System.currentTimeMillis();
Map<String, Ref> refsBefore = getAllRefs();
Collection<Ref> refsBefore = getAllRefs();

Set<ObjectId> allHeads = new HashSet<ObjectId>();
Set<ObjectId> nonHeads = new HashSet<ObjectId>();
Set<ObjectId> txnHeads = new HashSet<ObjectId>();
Set<ObjectId> tagTargets = new HashSet<ObjectId>();
Set<ObjectId> indexObjects = listNonHEADIndexObjects();
RefDatabase refdb = repo.getRefDatabase();

for (Ref ref : refsBefore.values()) {
for (Ref ref : refsBefore) {
nonHeads.addAll(listRefLogObjects(ref, 0));
if (ref.isSymbolic() || ref.getObjectId() == null)
continue;
if (ref.getName().startsWith(Constants.R_HEADS))
allHeads.add(ref.getObjectId());
else if (RefTreeNames.isRefTree(refdb, ref.getName()))
txnHeads.add(ref.getObjectId());
else
nonHeads.add(ref.getObjectId());
if (ref.getPeeledObjectId() != null)
@@ -572,6 +577,11 @@ public class GC {
if (rest != null)
ret.add(rest);
}
if (!txnHeads.isEmpty()) {
PackFile txn = writePack(txnHeads, null, null, excluded);
if (txn != null)
ret.add(txn);
}
try {
deleteOldPacks(toBeDeleted, ret);
} catch (ParseException e) {
@@ -624,11 +634,16 @@ public class GC {
* @return a map where names of refs point to ref objects
* @throws IOException
*/
private Map<String, Ref> getAllRefs() throws IOException {
Map<String, Ref> ret = repo.getRefDatabase().getRefs(ALL);
for (Ref ref : repo.getRefDatabase().getAdditionalRefs())
ret.put(ref.getName(), ref);
return ret;
private Collection<Ref> getAllRefs() throws IOException {
Collection<Ref> refs = RefTreeNames.allRefs(repo.getRefDatabase());
List<Ref> addl = repo.getRefDatabase().getAdditionalRefs();
if (!addl.isEmpty()) {
List<Ref> all = new ArrayList<>(refs.size() + addl.size());
all.addAll(refs);
all.addAll(addl);
return all;
}
return refs;
}

/**

+ 124
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftree/RefTreeNames.java View File

@@ -0,0 +1,124 @@
/*
* Copyright (C) 2016, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.internal.storage.reftree;

import static org.eclipse.jgit.lib.RefDatabase.ALL;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.RefDatabase;

/** Magic reference name logic for RefTrees. */
public class RefTreeNames {
/**
* Suffix used on a {@link RefTreeDatabase#getTxnNamespace()} for user data.
* <p>
* A {@link RefTreeDatabase}'s namespace may include a subspace (e.g.
* {@code "refs/txn/stage/"}) containing commit objects from the usual user
* portion of the repository (e.g. {@code "refs/heads/"}). These should be
* packed by the garbage collector alongside other user content rather than
* with the RefTree.
*/
private static final String STAGE = "stage/"; //$NON-NLS-1$

/**
* Determine if the reference is likely to be a RefTree.
*
* @param refdb
* database instance.
* @param ref
* reference name.
* @return {@code true} if the reference is a RefTree.
*/
public static boolean isRefTree(RefDatabase refdb, String ref) {
if (refdb instanceof RefTreeDatabase) {
RefTreeDatabase b = (RefTreeDatabase) refdb;
if (ref.equals(b.getTxnCommitted())) {
return true;
}

String namespace = b.getTxnNamespace();
if (namespace != null
&& ref.startsWith(namespace)
&& !ref.startsWith(namespace + STAGE)) {
return true;
}
}
return false;
}

/**
* Snapshot all references from a RefTreeDatabase and its bootstrap.
* <p>
* There may be name conflicts with multiple {@link Ref} objects containing
* the same name in the returned collection.
*
* @param refdb
* database instance.
* @return all known references.
* @throws IOException
* references cannot be enumerated.
*/
public static Collection<Ref> allRefs(RefDatabase refdb)
throws IOException {
Collection<Ref> refs = refdb.getRefs(ALL).values();
if (!(refdb instanceof RefTreeDatabase)) {
return refs;
}

RefDatabase bootstrap = ((RefTreeDatabase) refdb).getBootstrap();
Collection<Ref> br = bootstrap.getRefs(ALL).values();
List<Ref> all = new ArrayList<>(refs.size() + br.size());
all.addAll(refs);
all.addAll(br);
return all;
}

private RefTreeNames() {
}
}

Loading…
Cancel
Save