diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-06-26 15:17:09 -0700 |
---|---|---|
committer | Shawn O. Pearce <spearce@spearce.org> | 2010-06-26 16:13:22 -0700 |
commit | 3a7aec03e07ac853df5a00cbf06e6cd5e4ba2bc2 (patch) | |
tree | 30a238a321c3aa616ed00cb05f3ea7690fd16a25 | |
parent | ece88b99eb2ea6541b667aa066573184c25b6a8b (diff) | |
download | jgit-3a7aec03e07ac853df5a00cbf06e6cd5e4ba2bc2.tar.gz jgit-3a7aec03e07ac853df5a00cbf06e6cd5e4ba2bc2.zip |
Implement zero-copy for single window objects
Objects that fall completely within a single window can be worked
with in a zero-copy fashion, provided that the window is backed by
a normal byte[] and not by a ByteBuffer.
This works for a surprising number of objects. The default window
size is 8 KiB, but most deltas are quite a bit smaller than that.
Objects smaller than 1/2 of the window size have a very good chance
of falling completely within a window's array, which means we can
work with them without copying their data around.
Larger objects, or objects which are unlucky enough to span over a
window boundary, get copied through the temporary buffer. We pay
a tiny penalty to realize we can't use the zero-copy code path,
but its easier than trying to keep track of two adjacent windows.
With this change (as well as everything preceeding it), packing
is actually a bit faster. Some crude benchmarks based on cloning
linux-2.6.git (~324 MiB, 1,624,785 objects) over localhost using
C git client and JGit daemon shows we get better throughput, and
slightly better times:
Total Time | Throughput
(old) (now) | (old) (now)
--------------+---------------------------
2m45s 2m37s | 12.49 MiB/s 21.17 MiB/s
2m42s 2m36s | 16.29 MiB/s 22.63 MiB/s
2m37s 2m31s | 16.07 MiB/s 21.92 MiB/s
Change-Id: I48b2c8d37f08d7bf5e76c5a8020cde4a16ae3396
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
3 files changed, 65 insertions, 22 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java index 4f2373d3d2..0c5c818993 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java @@ -45,6 +45,9 @@ package org.eclipse.jgit.lib; +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.CRC32; import java.util.zip.DataFormatException; import java.util.zip.Inflater; @@ -80,4 +83,19 @@ final class ByteArrayWindow extends ByteWindow { o += inf.inflate(b, o, b.length - o); return o; } + + void crc32(CRC32 out, long pos, int cnt) { + out.update(array, (int) (pos - start), cnt); + } + + void write(OutputStream out, long pos, int cnt) throws IOException { + out.write(array, (int) (pos - start), cnt); + } + + void check(Inflater inf, byte[] tmp, long pos, int cnt) + throws DataFormatException { + inf.setInput(array, (int) (pos - start), cnt); + while (inf.inflate(tmp, 0, tmp.length) > 0) + continue; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java index 25835e2ff2..ab25f61889 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java @@ -338,26 +338,31 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { final long dataOffset = src.copyOffset + headerCnt; final long dataLength; final long expectedCRC; + final ByteArrayWindow quickCopy; // Verify the object isn't corrupt before sending. If it is, // we report it missing instead. // try { dataLength = findEndOffset(src.copyOffset) - dataOffset; + quickCopy = curs.quickCopy(this, dataOffset, dataLength); if (idx().hasCRC32Support()) { // Index has the CRC32 code cached, validate the object. // expectedCRC = idx().findCRC32(src); - - long pos = dataOffset; - long cnt = dataLength; - while (cnt > 0) { - final int n = (int) Math.min(cnt, buf.length); - readFully(pos, buf, 0, n, curs); - crc1.update(buf, 0, n); - pos += n; - cnt -= n; + if (quickCopy != null) { + quickCopy.crc32(crc1, dataOffset, (int) dataLength); + } else { + long pos = dataOffset; + long cnt = dataLength; + while (cnt > 0) { + final int n = (int) Math.min(cnt, buf.length); + readFully(pos, buf, 0, n, curs); + crc1.update(buf, 0, n); + pos += n; + cnt -= n; + } } if (crc1.getValue() != expectedCRC) { setCorrupt(src.copyOffset); @@ -370,21 +375,25 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { // now while inflating the raw data to get zlib to tell us // whether or not the data is safe. // - long pos = dataOffset; - long cnt = dataLength; Inflater inf = curs.inflater(); byte[] tmp = new byte[1024]; - while (cnt > 0) { - final int n = (int) Math.min(cnt, buf.length); - readFully(pos, buf, 0, n, curs); - crc1.update(buf, 0, n); - inf.setInput(buf, 0, n); - while (inf.inflate(tmp, 0, tmp.length) > 0) - continue; - pos += n; - cnt -= n; + if (quickCopy != null) { + quickCopy.check(inf, tmp, dataOffset, (int) dataLength); + } else { + long pos = dataOffset; + long cnt = dataLength; + while (cnt > 0) { + final int n = (int) Math.min(cnt, buf.length); + readFully(pos, buf, 0, n, curs); + crc1.update(buf, 0, n); + inf.setInput(buf, 0, n); + while (inf.inflate(tmp, 0, tmp.length) > 0) + continue; + pos += n; + cnt -= n; + } } - if (!inf.finished()) { + if (!inf.finished() || inf.getBytesRead() != dataLength) { setCorrupt(src.copyOffset); throw new EOFException(MessageFormat.format( JGitText.get().shortCompressedStreamAt, @@ -413,7 +422,14 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { throw gone; } - if (dataLength <= buf.length) { + if (quickCopy != null) { + // The entire object fits into a single byte array window slice, + // and we have it pinned. Write this out without copying. + // + out.writeHeader(src, inflatedLength); + quickCopy.write(out, dataOffset, (int) dataLength); + + } else if (dataLength <= buf.length) { // Tiny optimization: Lots of objects are very small deltas or // deflated commits that are likely to fit in the copy buffer. // diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java index 36095ed5e3..98916efcbd 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java @@ -166,6 +166,15 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs { } } + ByteArrayWindow quickCopy(PackFile p, long pos, long cnt) + throws IOException { + pin(p, pos); + if (window instanceof ByteArrayWindow + && window.contains(p, pos + (cnt - 1))) + return (ByteArrayWindow) window; + return null; + } + Inflater inflater() { prepareInflater(); return inf; |