aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/treewalk
diff options
context:
space:
mode:
authorShawn O. Pearce <spearce@spearce.org>2011-01-30 23:40:08 -0800
committerChris Aniszczyk <caniszczyk@gmail.com>2011-02-01 09:12:06 -0600
commit13bcf05a9ea2d4943faef2c879aac65d37517eb6 (patch)
tree73b76f7e93f568631ff8618b66d78f641bd3cace /org.eclipse.jgit/src/org/eclipse/jgit/treewalk
parent2fbcba41e365752681f635c706d577e605d3336a (diff)
downloadjgit-13bcf05a9ea2d4943faef2c879aac65d37517eb6.tar.gz
jgit-13bcf05a9ea2d4943faef2c879aac65d37517eb6.zip
PackWriter: Make thin packs more efficient
There is no point in pushing all of the files within the edge commits into the delta search when making a thin pack. This floods the delta search window with objects that are unlikely to be useful bases for the objects that will be written out, resulting in lower data compression and higher transfer sizes. Instead observe the path of a tree or blob that is being pushed into the outgoing set, and use that path to locate up to WINDOW ancestor versions from the edge commits. Push only those objects into the edgeObjects set, reducing the number of objects seen by the search window. This allows PackWriter to only look at ancestors for the modified files, rather than all files in the project. Limiting the search to WINDOW size makes sense, because more than WINDOW edge objects will just skip through the window search as none of them need to be delta compressed. To further improve compression, sort edge objects into the front of the window list, rather than randomly throughout. This puts non-edges later in the window and gives them a better chance at finding their base, since they search backwards through the window. These changes make a significant difference in the thin-pack: Before: remote: Counting objects: 144190, done remote: Finding sources: 100% (50275/50275) remote: Getting sizes: 100% (101405/101405) remote: Compressing objects: 100% (7587/7587) Receiving objects: 100% (50275/50275), 24.67 MiB | 9.90 MiB/s, done. Resolving deltas: 100% (40339/40339), completed with 2218 local objects. real 0m30.267s After: remote: Counting objects: 61549, done remote: Finding sources: 100% (50275/50275) remote: Getting sizes: 100% (18862/18862) remote: Compressing objects: 100% (7588/7588) Receiving objects: 100% (50275/50275), 11.04 MiB | 3.51 MiB/s, done. Resolving deltas: 100% (43160/43160), completed with 5014 local objects. real 0m22.170s The resulting pack is 13.63 MiB smaller, even though it contains the same exact objects. 82,543 fewer objects had to have their sizes looked up, which saved about 8s of server CPU time. 2,796 more objects from the client were used as part of the base object set, which contributed to the smaller transfer size. Change-Id: Id01271950432c6960897495b09deab70e33993a9 Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Sigend-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/treewalk')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java56
1 files changed, 42 insertions, 14 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java
index 79b57d1eb6..df3dac3919 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java
@@ -310,29 +310,47 @@ public abstract class AbstractTreeIterator {
}
int pathCompare(final AbstractTreeIterator p, final int pMode) {
- final byte[] a = path;
- final byte[] b = p.path;
- final int aLen = pathLen;
- final int bLen = p.pathLen;
- int cPos;
-
// Its common when we are a subtree for both parents to match;
// when this happens everything in path[0..cPos] is known to
// be equal and does not require evaluation again.
//
- cPos = alreadyMatch(this, p);
+ int cPos = alreadyMatch(this, p);
+ return pathCompare(p.path, cPos, p.pathLen, pMode, cPos);
+ }
+
+ /**
+ * Compare the path of this current entry to a raw buffer.
+ *
+ * @param buf
+ * the raw path buffer.
+ * @param pos
+ * position to start reading the raw buffer.
+ * @param end
+ * one past the end of the raw buffer (length is end - pos).
+ * @param mode
+ * the mode of the path.
+ * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
+ * p's entry sorts first.
+ */
+ public int pathCompare(byte[] buf, int pos, int end, int mode) {
+ return pathCompare(buf, pos, end, mode, 0);
+ }
+
+ private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) {
+ final byte[] a = path;
+ final int aEnd = pathLen;
- for (; cPos < aLen && cPos < bLen; cPos++) {
- final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
+ for (; aPos < aEnd && bPos < bEnd; aPos++, bPos++) {
+ final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff);
if (cmp != 0)
return cmp;
}
- if (cPos < aLen)
- return (a[cPos] & 0xff) - lastPathChar(pMode);
- if (cPos < bLen)
- return lastPathChar(mode) - (b[cPos] & 0xff);
- return lastPathChar(mode) - lastPathChar(pMode);
+ if (aPos < aEnd)
+ return (a[aPos] & 0xff) - lastPathChar(bMode);
+ if (bPos < bEnd)
+ return lastPathChar(mode) - (b[bPos] & 0xff);
+ return lastPathChar(mode) - lastPathChar(bMode);
}
private static int alreadyMatch(AbstractTreeIterator a,
@@ -406,6 +424,16 @@ public abstract class AbstractTreeIterator {
return TreeWalk.pathOf(this);
}
+ /** @return the internal buffer holding the current path. */
+ public byte[] getEntryPathBuffer() {
+ return path;
+ }
+
+ /** @return length of the path in {@link #getEntryPathBuffer()}. */
+ public int getEntryPathLength() {
+ return pathLen;
+ }
+
/**
* Get the current entry's path hash code.
* <p>