Implement async/batch lookup of object data

An ObjectReader implementation may be very slow for a single object, but yet support bulk queries efficiently by batching multiple small requests into a single larger request. This easily happens when the reader is built on top of a database that is stored on another host, as the network round-trip time starts to dominate the operation cost. RevWalk, ObjectWalk, UploadPack and PackWriter are the first major users of this new bulk interface, with the goal being to support an efficient way to pack a repository for a fetch/clone client when the source repository is stored in a high-latency storage system. Processing the want/have lists is now done in bulk, to remove the high costs associated with common ancestor negotiation. PackWriter already performs object reuse selection in bulk, but it now can also do the object size lookup and object counting phases with higher efficiency. Actual object reuse, deltification, and final output are still doing sequential lookups, making them a bit more expensive to perform. Change-Id: I4c966f84917482598012074c370b9831451404ee Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
author: Shawn O. Pearce <spearce@spearce.org> 2010-07-31 20:08:10 -0700
committer: Shawn O. Pearce <spearce@spearce.org> 2010-08-20 17:41:27 -0700
commit: f048af3fd19547d3692f5df968571ffd7556b688 (patch)
tree: c447126e0503035dd7cf2a57db852e8083470606 /org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
parent: 11a5bef8b1c866fadc9b340a1300c7fdd927a46b (diff)
download: jgit-f048af3fd19547d3692f5df968571ffd7556b688.tar.gz
jgit-f048af3fd19547d3692f5df968571ffd7556b688.zip
1 files changed, 112 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
index e1ee1441d7..d4e866a22b 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
@@ -45,6 +45,7 @@ package org.eclipse.jgit.lib;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Iterator;
 
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.errors.MissingObjectException;
@@ -150,6 +151,60 @@ public abstract class ObjectReader {
 			IOException;
 
 	/**
+	 * Asynchronous object opening.
+	 *
+	 * @param <T>
+	 *            type of identifier being supplied.
+	 * @param objectIds
+	 *            objects to open from the object store. The supplied collection
+	 *            must not be modified until the queue has finished.
+	 * @param reportMissing
+	 *            if true missing objects are reported by calling failure with a
+	 *            MissingObjectException. This may be more expensive for the
+	 *            implementation to guarantee. If false the implementation may
+	 *            choose to report MissingObjectException, or silently skip over
+	 *            the object with no warning.
+	 * @return queue to read the objects from.
+	 */
+	public <T extends ObjectId> AsyncObjectLoaderQueue<T> open(
+			Iterable<T> objectIds, final boolean reportMissing) {
+		final Iterator<T> idItr = objectIds.iterator();
+		return new AsyncObjectLoaderQueue<T>() {
+			private T cur;
+
+			public boolean next() throws MissingObjectException, IOException {
+				if (idItr.hasNext()) {
+					cur = idItr.next();
+					return true;
+				} else {
+					return false;
+				}
+			}
+
+			public T getCurrent() {
+				return cur;
+			}
+
+			public ObjectId getObjectId() {
+				return cur;
+			}
+
+			public ObjectLoader open() throws IOException {
+				return ObjectReader.this.open(cur, OBJ_ANY);
+			}
+
+			public boolean cancel(boolean mayInterruptIfRunning) {
+				return true;
+			}
+
+			public void release() {
+				// Since we are sequential by default, we don't
+				// have any state to clean up if we terminate early.
+			}
+		};
+	}
+
+	/**
 	 * Get only the size of an object.
 	 * <p>
 	 * The default implementation of this method opens an ObjectLoader.
@@ -178,6 +233,63 @@ public abstract class ObjectReader {
 	}
 
 	/**
+	 * Asynchronous object size lookup.
+	 *
+	 * @param <T>
+	 *            type of identifier being supplied.
+	 * @param objectIds
+	 *            objects to get the size of from the object store. The supplied
+	 *            collection must not be modified until the queue has finished.
+	 * @param reportMissing
+	 *            if true missing objects are reported by calling failure with a
+	 *            MissingObjectException. This may be more expensive for the
+	 *            implementation to guarantee. If false the implementation may
+	 *            choose to report MissingObjectException, or silently skip over
+	 *            the object with no warning.
+	 * @return queue to read object sizes from.
+	 */
+	public <T extends ObjectId> AsyncObjectSizeQueue<T> getObjectSize(
+			Iterable<T> objectIds, final boolean reportMissing) {
+		final Iterator<T> idItr = objectIds.iterator();
+		return new AsyncObjectSizeQueue<T>() {
+			private T cur;
+
+			private long sz;
+
+			public boolean next() throws MissingObjectException, IOException {
+				if (idItr.hasNext()) {
+					cur = idItr.next();
+					sz = getObjectSize(cur, OBJ_ANY);
+					return true;
+				} else {
+					return false;
+				}
+			}
+
+			public T getCurrent() {
+				return cur;
+			}
+
+			public ObjectId getObjectId() {
+				return cur;
+			}
+
+			public long getSize() {
+				return sz;
+			}
+
+			public boolean cancel(boolean mayInterruptIfRunning) {
+				return true;
+			}
+
+			public void release() {
+				// Since we are sequential by default, we don't
+				// have any state to clean up if we terminate early.
+			}
+		};
+	}
+
+	/**
 	 * Advice from a {@link RevWalk} that a walk is starting from these roots.
 	 *
 	 * @param walk
author	Shawn O. Pearce <spearce@spearce.org>	2010-07-31 20:08:10 -0700
committer	Shawn O. Pearce <spearce@spearce.org>	2010-08-20 17:41:27 -0700
commit	f048af3fd19547d3692f5df968571ffd7556b688 (patch)
tree	c447126e0503035dd7cf2a57db852e8083470606 /org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
parent	11a5bef8b1c866fadc9b340a1300c7fdd927a46b (diff)
download	jgit-f048af3fd19547d3692f5df968571ffd7556b688.tar.gz jgit-f048af3fd19547d3692f5df968571ffd7556b688.zip