diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2011-01-30 23:40:08 -0800 |
---|---|---|
committer | Chris Aniszczyk <caniszczyk@gmail.com> | 2011-02-01 09:12:06 -0600 |
commit | 13bcf05a9ea2d4943faef2c879aac65d37517eb6 (patch) | |
tree | 73b76f7e93f568631ff8618b66d78f641bd3cace /org.eclipse.jgit/src/org/eclipse/jgit/treewalk | |
parent | 2fbcba41e365752681f635c706d577e605d3336a (diff) | |
download | jgit-13bcf05a9ea2d4943faef2c879aac65d37517eb6.tar.gz jgit-13bcf05a9ea2d4943faef2c879aac65d37517eb6.zip |
PackWriter: Make thin packs more efficient
There is no point in pushing all of the files within the edge
commits into the delta search when making a thin pack. This floods
the delta search window with objects that are unlikely to be useful
bases for the objects that will be written out, resulting in lower
data compression and higher transfer sizes.
Instead observe the path of a tree or blob that is being pushed
into the outgoing set, and use that path to locate up to WINDOW
ancestor versions from the edge commits. Push only those objects
into the edgeObjects set, reducing the number of objects seen by the
search window. This allows PackWriter to only look at ancestors
for the modified files, rather than all files in the project.
Limiting the search to WINDOW size makes sense, because more than
WINDOW edge objects will just skip through the window search as
none of them need to be delta compressed.
To further improve compression, sort edge objects into the front
of the window list, rather than randomly throughout. This puts
non-edges later in the window and gives them a better chance at
finding their base, since they search backwards through the window.
These changes make a significant difference in the thin-pack:
Before:
remote: Counting objects: 144190, done
remote: Finding sources: 100% (50275/50275)
remote: Getting sizes: 100% (101405/101405)
remote: Compressing objects: 100% (7587/7587)
Receiving objects: 100% (50275/50275), 24.67 MiB | 9.90 MiB/s, done.
Resolving deltas: 100% (40339/40339), completed with 2218 local objects.
real 0m30.267s
After:
remote: Counting objects: 61549, done
remote: Finding sources: 100% (50275/50275)
remote: Getting sizes: 100% (18862/18862)
remote: Compressing objects: 100% (7588/7588)
Receiving objects: 100% (50275/50275), 11.04 MiB | 3.51 MiB/s, done.
Resolving deltas: 100% (43160/43160), completed with 5014 local objects.
real 0m22.170s
The resulting pack is 13.63 MiB smaller, even though it contains the
same exact objects. 82,543 fewer objects had to have their sizes
looked up, which saved about 8s of server CPU time. 2,796 more
objects from the client were used as part of the base object set,
which contributed to the smaller transfer size.
Change-Id: Id01271950432c6960897495b09deab70e33993a9
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Sigend-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/treewalk')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java | 56 |
1 files changed, 42 insertions, 14 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java index 79b57d1eb6..df3dac3919 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java @@ -310,29 +310,47 @@ public abstract class AbstractTreeIterator { } int pathCompare(final AbstractTreeIterator p, final int pMode) { - final byte[] a = path; - final byte[] b = p.path; - final int aLen = pathLen; - final int bLen = p.pathLen; - int cPos; - // Its common when we are a subtree for both parents to match; // when this happens everything in path[0..cPos] is known to // be equal and does not require evaluation again. // - cPos = alreadyMatch(this, p); + int cPos = alreadyMatch(this, p); + return pathCompare(p.path, cPos, p.pathLen, pMode, cPos); + } + + /** + * Compare the path of this current entry to a raw buffer. + * + * @param buf + * the raw path buffer. + * @param pos + * position to start reading the raw buffer. + * @param end + * one past the end of the raw buffer (length is end - pos). + * @param mode + * the mode of the path. + * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if + * p's entry sorts first. + */ + public int pathCompare(byte[] buf, int pos, int end, int mode) { + return pathCompare(buf, pos, end, mode, 0); + } + + private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) { + final byte[] a = path; + final int aEnd = pathLen; - for (; cPos < aLen && cPos < bLen; cPos++) { - final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff); + for (; aPos < aEnd && bPos < bEnd; aPos++, bPos++) { + final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff); if (cmp != 0) return cmp; } - if (cPos < aLen) - return (a[cPos] & 0xff) - lastPathChar(pMode); - if (cPos < bLen) - return lastPathChar(mode) - (b[cPos] & 0xff); - return lastPathChar(mode) - lastPathChar(pMode); + if (aPos < aEnd) + return (a[aPos] & 0xff) - lastPathChar(bMode); + if (bPos < bEnd) + return lastPathChar(mode) - (b[bPos] & 0xff); + return lastPathChar(mode) - lastPathChar(bMode); } private static int alreadyMatch(AbstractTreeIterator a, @@ -406,6 +424,16 @@ public abstract class AbstractTreeIterator { return TreeWalk.pathOf(this); } + /** @return the internal buffer holding the current path. */ + public byte[] getEntryPathBuffer() { + return path; + } + + /** @return length of the path in {@link #getEntryPathBuffer()}. */ + public int getEntryPathLength() { + return pathLen; + } + /** * Get the current entry's path hash code. * <p> |