From f5fe2dca3cb9f57891e1a4b18832fcc158d0c490 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 26 Jan 2011 14:27:32 -0800 Subject: [PATCH] Teach PackWriter how to reuse an existing object list Counting the objects needed for packing is the most expensive part of an UploadPack request that has no uninteresting objects (otherwise known as an initial clone). During this phase the PackWriter is enumerating the entire set of objects in this repository, so they can be sent to the client for their new clone. Allow the ObjectReader (and therefore the underlying storage system) to keep a cached list of all reachable objects from a small number of points in the project's history. If one of those points is reached during enumeration of the commit graph, most objects are obtained from the cached list instead of direct traversal. PackWriter uses the list by discarding the current object lists and restarting a traversal from all refs but marking the object list name as uninteresting. This allows PackWriter to enumerate all objects that are more recent than the list creation, or that were on side branches that the list does not include. However, ObjectWalk tags all of the trees and commits within the list commit as UNINTERESTING, which would normally cause PackWriter to construct a thin pack that excludes these objects. To avoid that, addObject() was refactored to allow this list-based enumeration to always include an object, even if it has been tagged UNINTERESTING by the ObjectWalk. This implies the list-based enumeration may only be used for initial clones, where all objects are being sent. The UNINTERESTING labeling occurs because StartGenerator always enables the BoundaryGenerator if the walker is an ObjectWalk and a commit was marked UNINTERESTING, even if RevSort.BOUNDARY was not enabled. This is the default reasonable behavior for an ObjectWalk, but isn't desired here in PackWriter with the list-based enumeration. Rather than trying to change all of this behavior, PackWriter works around it. Because the list name commit's immediate files and trees were all enumerated before the list enumeration itself starts (and are also within the list itself) PackWriter runs the risk of adding the same objects to its ObjectIdSubclassMap twice. Since this breaks the internal map data structure (and also may cause the object to transmit twice), PackWriter needs to use a new "added" RevFlag to track whether or not an object has been put into the outgoing list yet. Change-Id: Ie99ed4d969a6bb20cc2528ac6b8fb91043cee071 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/lib/ObjectReader.java | 32 ++++ .../jgit/revwalk/ObjectListIterator.java | 140 ++++++++++++++++++ .../eclipse/jgit/storage/pack/PackWriter.java | 120 ++++++++++++--- 3 files changed, 271 insertions(+), 21 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java index cd3706bbe3..85864dbbf6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java @@ -46,11 +46,14 @@ package org.eclipse.jgit.lib; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Set; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.revwalk.ObjectListIterator; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; @@ -427,6 +430,35 @@ public abstract class ObjectReader { // Do nothing by default, most readers don't want or need advice. } + /** + * Obtain the available pre-computed object reachability lists. + *

+ * The lists are indexed by commit ObjectId, so the returned set contains + * the commit ObjectIds naming each set. + * + * @return set of commit ObjectIds that identify lists. + */ + public Set getAvailableObjectLists() { + return Collections.emptySet(); + } + + /** + * Open a pre-computed object list for reading. + * + * @param listName + * a commit ObjectId previously returned by + * {@link #getAvailableObjectLists()}. + * @param walker + * the revision pool to use when looking up objects. + * @return the list iterator. + * @throws IOException + * the reader cannot load the precomputed list. + */ + public ObjectListIterator openObjectList(AnyObjectId listName, + ObjectWalk walker) throws IOException { + throw new UnsupportedOperationException(); + } + /** * Release any resources used by this reader. *

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java new file mode 100644 index 0000000000..ab9cbec12d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2011, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.revwalk; + +import java.io.IOException; + +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.storage.pack.PackWriter; + +/** + * Iterates through an open object list. + *

+ * A cached object list should be constructed by enumerating from a single + * stable commit back to the beginning of the project, using an ObjectWalk: + * + *

+ * ObjectWalk walk = new ObjectWalk(repository);
+ * walk.markStart(walk.parseCommit(listName));
+ *
+ * RevCommit commit;
+ * while ((commit = walk.next()) != null)
+ * 	list.addCommit(commit);
+ *
+ * RevObject object;
+ * while ((object == walk.nextObject()) != null)
+ * 	list.addObject(object, walk.getPathHasCode());
+ * 
+ *

+ * {@link PackWriter} relies on the list covering a single commit, and going all + * the way back to the root. If a list contains multiple starting commits the + * PackWriter will include all of those objects, even if the client did not ask + * for them, or should not have been given the objects. + */ +public abstract class ObjectListIterator { + private final ObjectWalk walk; + + /** + * Initialize the list iterator. + * + * @param walk + * the revision pool the iterator will use when allocating the + * returned objects. + */ + protected ObjectListIterator(ObjectWalk walk) { + this.walk = walk; + } + + /** + * Lookup an object from the revision pool. + * + * @param id + * the object to allocate. + * @param type + * the type of the object. The type must be accurate, as it is + * used to allocate the proper RevObject instance. + * @return the object. + */ + protected RevObject lookupAny(AnyObjectId id, int type) { + return walk.lookupAny(id, type); + } + + /** + * Pop the next most recent commit. + *

+ * Commits should be returned in descending commit time order, or in + * topological order. Either ordering is acceptable for a list to use. + * + * @return next most recent commit; null if traversal is over. + * @throws IOException + * the list cannot be read. + */ + public abstract RevCommit next() throws IOException; + + /** + * Pop the next most recent object. + *

+ * Only RevTree and RevBlob may be returned from this method, as these are + * the only non-commit types reachable from a RevCommit. Lists may return + * the objects clustered by type, or clustered by order of first-discovery + * when walking from the most recent to the oldest commit. + * + * @return the next object. Null at the end of the list. + * @throws IOException + * the list cannot be read. + */ + public abstract RevObject nextObject() throws IOException; + + /** + * Get the current object's path hash code. + *

+ * The path hash code should be cached from the ObjectWalk. + * + * @return path hash code; any integer may be returned. + */ + public abstract int getPathHashCode(); + + /** Release the resources associated with this iterator. */ + public abstract void release(); +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index 17c5a12d40..15939de7ac 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -86,6 +86,7 @@ import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; +import org.eclipse.jgit.revwalk.ObjectListIterator; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; @@ -376,9 +377,8 @@ public class PackWriter { throws IOException { if (countingMonitor == null) countingMonitor = NullProgressMonitor.INSTANCE; - ObjectWalk walker = setUpWalker(interestingObjects, + findObjectsToPack(countingMonitor, interestingObjects, uninterestingObjects); - findObjectsToPack(countingMonitor, walker); } /** @@ -972,11 +972,14 @@ public class PackWriter { out.write(packcsum); } - private ObjectWalk setUpWalker( + private void findObjectsToPack(final ProgressMonitor countingMonitor, final Collection interestingObjects, final Collection uninterestingObjects) throws MissingObjectException, IOException, IncorrectObjectTypeException { + countingMonitor.beginTask(JGitText.get().countingObjects, + ProgressMonitor.UNKNOWN); + List all = new ArrayList(interestingObjects.size()); for (ObjectId id : interestingObjects) all.add(id.copy()); @@ -991,13 +994,18 @@ public class PackWriter { not = Collections.emptySet(); final ObjectWalk walker = new ObjectWalk(reader); + final RevFlag hasObjectList = walker.newFlag("hasObjectList"); + walker.setRetainBody(false); - if (not.isEmpty()) + if (not.isEmpty()) { walker.sort(RevSort.COMMIT_TIME_DESC); - else + for (ObjectId listName : reader.getAvailableObjectLists()) + walker.lookupCommit(listName).add(hasObjectList); + } else { walker.sort(RevSort.TOPO); - if (thin && !not.isEmpty()) - walker.sort(RevSort.BOUNDARY, true); + if (thin) + walker.sort(RevSort.BOUNDARY, true); + } AsyncRevObjectQueue q = walker.parseAny(all, true); try { @@ -1020,27 +1028,93 @@ public class PackWriter { } finally { q.release(); } - return walker; - } - private void findObjectsToPack(final ProgressMonitor countingMonitor, - final ObjectWalk walker) throws MissingObjectException, - IncorrectObjectTypeException, IOException { - countingMonitor.beginTask(JGitText.get().countingObjects, - ProgressMonitor.UNKNOWN); + RevObject listName = null; RevObject o; while ((o = walker.next()) != null) { - addObject(o, 0); + if (o.has(hasObjectList)) { + listName = o; + break; + } + addResultOrBase(o, 0); countingMonitor.update(1); } - while ((o = walker.nextObject()) != null) { - addObject(o, walker.getPathHashCode()); - countingMonitor.update(1); + if (listName != null) { + addByObjectList(listName, countingMonitor, walker, + interestingObjects); + } else { + while ((o = walker.nextObject()) != null) { + addResultOrBase(o, walker.getPathHashCode()); + countingMonitor.update(1); + } } countingMonitor.endTask(); } + private void addByObjectList(RevObject listName, + final ProgressMonitor countingMonitor, final ObjectWalk walker, + final Collection interestingObjects) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + int restartedProgress = objectsMap.size(); + + for (List list : objectsLists) + list.clear(); + objectsMap.clear(); + + walker.reset(); + walker.markUninteresting(listName); + + for (ObjectId id : interestingObjects) + walker.markStart(walker.lookupOrNull(id)); + + RevFlag added = walker.newFlag("added"); + RevObject o; + while ((o = walker.next()) != null) { + addResult(o, 0); + o.add(added); + if (restartedProgress != 0) + restartedProgress--; + else + countingMonitor.update(1); + } + while ((o = walker.nextObject()) != null) { + addResult(o, walker.getPathHashCode()); + o.add(added); + if (restartedProgress != 0) + restartedProgress--; + else + countingMonitor.update(1); + } + + ObjectListIterator itr = reader.openObjectList(listName, walker); + try { + while ((o = itr.next()) != null) { + if (o.has(added)) + continue; + addResult(o, 0); + o.add(added); + if (restartedProgress != 0) + restartedProgress--; + else + countingMonitor.update(1); + } + while ((o = itr.nextObject()) != null) { + if (o.has(added)) + continue; + addResult(o, itr.getPathHashCode()); + o.add(added); + if (restartedProgress != 0) + restartedProgress--; + else + countingMonitor.update(1); + } + } finally { + itr.release(); + } + } + /** * Include one object to the output file. *

@@ -1055,10 +1129,10 @@ public class PackWriter { */ public void addObject(final RevObject object) throws IncorrectObjectTypeException { - addObject(object, 0); + addResultOrBase(object, 0); } - private void addObject(final RevObject object, final int pathHashCode) + private void addResultOrBase(final RevObject object, final int pathHashCode) throws IncorrectObjectTypeException { if (object.has(RevFlag.UNINTERESTING)) { switch (object.getType()) { @@ -1071,9 +1145,13 @@ public class PackWriter { thin = true; break; } - return; + } else { + addResult(object, pathHashCode); } + } + private void addResult(final RevObject object, final int pathHashCode) + throws IncorrectObjectTypeException { final ObjectToPack otp; if (reuseSupport != null) otp = reuseSupport.newObjectToPack(object); -- 2.39.5