diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-07-02 02:19:12 -0700 |
---|---|---|
committer | Shawn O. Pearce <spearce@spearce.org> | 2010-07-02 02:19:12 -0700 |
commit | ad68553be4417ec7ac636c3d823fdddced46ecfb (patch) | |
tree | 456e656c6d46be39df3fccb5e96085b9813b91d6 /org.eclipse.jgit.test | |
parent | ded8f6c72150253c86b9586025da3a3997ccad6c (diff) | |
download | jgit-ad68553be4417ec7ac636c3d823fdddced46ecfb.tar.gz jgit-ad68553be4417ec7ac636c3d823fdddced46ecfb.zip |
Support large delta packed objects as streams
Very large delta instruction streams, or deltas which use very large
base objects, are now streamed through as large objects rather than
being inflated into a byte array.
This isn't the most efficient way to access delta encoded content, as
we may need to rewind and reprocess the base object when there was a
block moved within the file, but it will at least prevent the JVM from
having its heap explode.
When streaming a delta we have an inflater open for each level in the
delta chain, to inflate the instruction set of the delta, as well as
an inflater for the base level object. The base object is buffered,
as is the top level delta requested by the application, but we do not
buffer the intermediate delta streams. This keeps memory usage lower,
so its closer to 1024 bytes per level in the chain, without having an
adverse impact on raw throughput as the top-level buffer gets pushed
down to the lowest stream that has the next region.
Delta instructions transparently collapse here, if the top level does
not copy a region from its base, the base won't materialize that part
from its own base, etc. This allows us to avoid copying around a lot
of segments which have been deleted from the final version.
Change-Id: I724d45245cebb4bad2deeae7b896fc55b2dd49b3
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Diffstat (limited to 'org.eclipse.jgit.test')
-rw-r--r-- | org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java | 250 | ||||
-rw-r--r-- | org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java | 273 |
2 files changed, 523 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java index 55459ac265..1b6e3bff95 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java @@ -43,17 +43,29 @@ package org.eclipse.jgit.storage.file; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.security.MessageDigest; import java.util.Arrays; +import java.util.zip.Deflater; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.junit.LocalDiskRepositoryTestCase; import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.junit.TestRng; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.revwalk.RevBlob; +import org.eclipse.jgit.storage.pack.DeltaEncoder; +import org.eclipse.jgit.transport.IndexPack; import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.NB; +import org.eclipse.jgit.util.TemporaryBuffer; public class PackFileTest extends LocalDiskRepositoryTestCase { private TestRng rng; @@ -134,4 +146,242 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { assertEquals("stream at EOF", -1, in.read()); in.close(); } + + public void testDelta_SmallObjectChain() throws Exception { + ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); + byte[] data0 = new byte[512]; + Arrays.fill(data0, (byte) 0xf3); + ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); + + TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024); + packHeader(pack, 4); + objectHeader(pack, Constants.OBJ_BLOB, data0.length); + deflate(pack, data0); + + byte[] data1 = clone(0x01, data0); + byte[] delta1 = delta(data0, data1); + ObjectId id1 = fmt.idFor(Constants.OBJ_BLOB, data1); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta1.length); + id0.copyRawTo(pack); + deflate(pack, delta1); + + byte[] data2 = clone(0x02, data1); + byte[] delta2 = delta(data1, data2); + ObjectId id2 = fmt.idFor(Constants.OBJ_BLOB, data2); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta2.length); + id1.copyRawTo(pack); + deflate(pack, delta2); + + byte[] data3 = clone(0x03, data2); + byte[] delta3 = delta(data2, data3); + ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length); + id2.copyRawTo(pack); + deflate(pack, delta3); + + digest(pack); + final byte[] raw = pack.toByteArray(); + IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw)); + ip.setFixThin(true); + ip.index(NullProgressMonitor.INSTANCE); + ip.renameAndOpenPack(); + + assertTrue("has blob", wc.has(id3)); + + ObjectLoader ol = wc.open(id3); + assertNotNull("created loader", ol); + assertEquals(Constants.OBJ_BLOB, ol.getType()); + assertEquals(data3.length, ol.getSize()); + assertFalse("is large", ol.isLarge()); + assertNotNull(ol.getCachedBytes()); + assertTrue(Arrays.equals(data3, ol.getCachedBytes())); + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(Constants.OBJ_BLOB, in.getType()); + assertEquals(data3.length, in.getSize()); + byte[] act = new byte[data3.length]; + IO.readFully(in, act, 0, data3.length); + assertTrue("same content", Arrays.equals(act, data3)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + public void testDelta_LargeObjectChain() throws Exception { + ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); + byte[] data0 = new byte[UnpackedObject.LARGE_OBJECT + 5]; + Arrays.fill(data0, (byte) 0xf3); + ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); + + TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024); + packHeader(pack, 4); + objectHeader(pack, Constants.OBJ_BLOB, data0.length); + deflate(pack, data0); + + byte[] data1 = clone(0x01, data0); + byte[] delta1 = delta(data0, data1); + ObjectId id1 = fmt.idFor(Constants.OBJ_BLOB, data1); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta1.length); + id0.copyRawTo(pack); + deflate(pack, delta1); + + byte[] data2 = clone(0x02, data1); + byte[] delta2 = delta(data1, data2); + ObjectId id2 = fmt.idFor(Constants.OBJ_BLOB, data2); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta2.length); + id1.copyRawTo(pack); + deflate(pack, delta2); + + byte[] data3 = clone(0x03, data2); + byte[] delta3 = delta(data2, data3); + ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length); + id2.copyRawTo(pack); + deflate(pack, delta3); + + digest(pack); + final byte[] raw = pack.toByteArray(); + IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw)); + ip.setFixThin(true); + ip.index(NullProgressMonitor.INSTANCE); + ip.renameAndOpenPack(); + + assertTrue("has blob", wc.has(id3)); + + ObjectLoader ol = wc.open(id3); + assertNotNull("created loader", ol); + assertEquals(Constants.OBJ_BLOB, ol.getType()); + assertEquals(data3.length, ol.getSize()); + assertTrue("is large", ol.isLarge()); + try { + ol.getCachedBytes(); + fail("Should have thrown LargeObjectException"); + } catch (LargeObjectException tooBig) { + assertEquals(id3.name(), tooBig.getMessage()); + } + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(Constants.OBJ_BLOB, in.getType()); + assertEquals(data3.length, in.getSize()); + byte[] act = new byte[data3.length]; + IO.readFully(in, act, 0, data3.length); + assertTrue("same content", Arrays.equals(act, data3)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + public void testDelta_LargeInstructionStream() throws Exception { + ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); + byte[] data0 = new byte[32]; + Arrays.fill(data0, (byte) 0xf3); + ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); + + byte[] data3 = rng.nextBytes(UnpackedObject.LARGE_OBJECT + 5); + ByteArrayOutputStream tmp = new ByteArrayOutputStream(); + DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length); + de.insert(data3, 0, data3.length); + byte[] delta3 = tmp.toByteArray(); + assertTrue(delta3.length > UnpackedObject.LARGE_OBJECT); + + TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024); + packHeader(pack, 2); + objectHeader(pack, Constants.OBJ_BLOB, data0.length); + deflate(pack, data0); + + ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3); + objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length); + id0.copyRawTo(pack); + deflate(pack, delta3); + + digest(pack); + final byte[] raw = pack.toByteArray(); + IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw)); + ip.setFixThin(true); + ip.index(NullProgressMonitor.INSTANCE); + ip.renameAndOpenPack(); + + assertTrue("has blob", wc.has(id3)); + + ObjectLoader ol = wc.open(id3); + assertNotNull("created loader", ol); + assertEquals(Constants.OBJ_BLOB, ol.getType()); + assertEquals(data3.length, ol.getSize()); + assertTrue("is large", ol.isLarge()); + try { + ol.getCachedBytes(); + fail("Should have thrown LargeObjectException"); + } catch (LargeObjectException tooBig) { + assertEquals(id3.name(), tooBig.getMessage()); + } + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(Constants.OBJ_BLOB, in.getType()); + assertEquals(data3.length, in.getSize()); + byte[] act = new byte[data3.length]; + IO.readFully(in, act, 0, data3.length); + assertTrue("same content", Arrays.equals(act, data3)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + private byte[] clone(int first, byte[] base) { + byte[] r = new byte[base.length]; + System.arraycopy(base, 1, r, 1, r.length - 1); + r[0] = (byte) first; + return r; + } + + private byte[] delta(byte[] base, byte[] dest) throws IOException { + ByteArrayOutputStream tmp = new ByteArrayOutputStream(); + DeltaEncoder de = new DeltaEncoder(tmp, base.length, dest.length); + de.insert(dest, 0, 1); + de.copy(1, base.length - 1); + return tmp.toByteArray(); + } + + private void packHeader(TemporaryBuffer.Heap pack, int cnt) + throws IOException { + final byte[] hdr = new byte[8]; + NB.encodeInt32(hdr, 0, 2); + NB.encodeInt32(hdr, 4, cnt); + pack.write(Constants.PACK_SIGNATURE); + pack.write(hdr, 0, 8); + } + + private void objectHeader(TemporaryBuffer.Heap pack, int type, int sz) + throws IOException { + byte[] buf = new byte[8]; + int nextLength = sz >>> 4; + buf[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (sz & 0x0F)); + sz = nextLength; + int n = 1; + while (sz > 0) { + nextLength >>>= 7; + buf[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (sz & 0x7F)); + sz = nextLength; + } + pack.write(buf, 0, n); + } + + private void deflate(TemporaryBuffer.Heap pack, final byte[] content) + throws IOException { + final Deflater deflater = new Deflater(); + final byte[] buf = new byte[128]; + deflater.setInput(content, 0, content.length); + deflater.finish(); + do { + final int n = deflater.deflate(buf, 0, buf.length); + if (n > 0) + pack.write(buf, 0, n); + } while (!deflater.finished()); + deflater.end(); + } + + private void digest(TemporaryBuffer.Heap buf) throws IOException { + MessageDigest md = Constants.newMessageDigest(); + md.update(buf.toByteArray()); + buf.write(md.digest()); + } } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java new file mode 100644 index 0000000000..9b34ad5e09 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.junit.TestRng; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.util.IO; + +public class DeltaStreamTest extends TestCase { + private TestRng rng; + + private ByteArrayOutputStream deltaBuf; + + private DeltaEncoder deltaEnc; + + private byte[] base; + + private byte[] data; + + private int dataPtr; + + private byte[] delta; + + protected void setUp() throws Exception { + super.setUp(); + rng = new TestRng(getName()); + deltaBuf = new ByteArrayOutputStream(); + } + + public void testCopy_SingleOp() throws IOException { + init((1 << 16) + 1, (1 << 8) + 1); + copy(0, data.length); + assertValidState(); + } + + public void testCopy_MaxSize() throws IOException { + int max = (0xff << 16) + (0xff << 8) + 0xff; + init(1 + max, max); + copy(1, max); + assertValidState(); + } + + public void testCopy_64k() throws IOException { + init(0x10000 + 2, 0x10000 + 1); + copy(1, 0x10000); + copy(0x10001, 1); + assertValidState(); + } + + public void testCopy_Gap() throws IOException { + init(256, 8); + copy(4, 4); + copy(128, 4); + assertValidState(); + } + + public void testCopy_OutOfOrder() throws IOException { + init((1 << 16) + 1, (1 << 16) + 1); + copy(1 << 8, 1 << 8); + copy(0, data.length - dataPtr); + assertValidState(); + } + + public void testInsert_SingleOp() throws IOException { + init((1 << 16) + 1, 2); + insert("hi"); + assertValidState(); + } + + public void testInsertAndCopy() throws IOException { + init(8, 512); + insert(new byte[127]); + insert(new byte[127]); + insert(new byte[127]); + insert(new byte[125]); + copy(2, 6); + assertValidState(); + } + + public void testSkip() throws IOException { + init(32, 15); + copy(2, 2); + insert("ab"); + insert("cd"); + copy(4, 4); + copy(0, 2); + insert("efg"); + assertValidState(); + + for (int p = 0; p < data.length; p++) { + byte[] act = new byte[data.length]; + System.arraycopy(data, 0, act, 0, p); + DeltaStream in = open(); + IO.skipFully(in, p); + assertEquals(data.length - p, in.read(act, p, data.length - p)); + assertEquals(-1, in.read()); + assertTrue("skipping " + p, Arrays.equals(data, act)); + } + + // Skip all the way to the end should still recognize EOF. + DeltaStream in = open(); + IO.skipFully(in, data.length); + assertEquals(-1, in.read()); + assertEquals(0, in.skip(1)); + + // Skip should not open the base as we move past it, but it + // will open when we need to start copying data from it. + final boolean[] opened = new boolean[1]; + in = new DeltaStream(new ByteArrayInputStream(delta)) { + @Override + protected long getBaseSize() throws IOException { + return base.length; + } + + @Override + protected InputStream openBase() throws IOException { + opened[0] = true; + return new ByteArrayInputStream(base); + } + }; + IO.skipFully(in, 7); + assertFalse("not yet open", opened[0]); + assertEquals(data[7], in.read()); + assertTrue("now open", opened[0]); + } + + public void testIncorrectBaseSize() throws IOException { + init(4, 4); + copy(0, 4); + assertValidState(); + + DeltaStream in = new DeltaStream(new ByteArrayInputStream(delta)) { + @Override + protected long getBaseSize() throws IOException { + return 128; + } + + @Override + protected InputStream openBase() throws IOException { + return new ByteArrayInputStream(base); + } + }; + try { + in.read(new byte[4]); + fail("did not throw an exception"); + } catch (CorruptObjectException e) { + assertEquals(JGitText.get().baseLengthIncorrect, e.getMessage()); + } + + in = new DeltaStream(new ByteArrayInputStream(delta)) { + @Override + protected long getBaseSize() throws IOException { + return 4; + } + + @Override + protected InputStream openBase() throws IOException { + return new ByteArrayInputStream(new byte[0]); + } + }; + try { + in.read(new byte[4]); + fail("did not throw an exception"); + } catch (CorruptObjectException e) { + assertEquals(JGitText.get().baseLengthIncorrect, e.getMessage()); + } + } + + private void init(int baseSize, int dataSize) throws IOException { + base = rng.nextBytes(baseSize); + data = new byte[dataSize]; + deltaEnc = new DeltaEncoder(deltaBuf, baseSize, dataSize); + } + + private void copy(int offset, int len) throws IOException { + System.arraycopy(base, offset, data, dataPtr, len); + deltaEnc.copy(offset, len); + assertEquals(deltaBuf.size(), deltaEnc.getSize()); + dataPtr += len; + } + + private void insert(String text) throws IOException { + insert(Constants.encode(text)); + } + + private void insert(byte[] text) throws IOException { + System.arraycopy(text, 0, data, dataPtr, text.length); + deltaEnc.insert(text); + assertEquals(deltaBuf.size(), deltaEnc.getSize()); + dataPtr += text.length; + } + + private void assertValidState() throws IOException { + assertEquals("test filled example result", data.length, dataPtr); + + delta = deltaBuf.toByteArray(); + assertEquals(base.length, BinaryDelta.getBaseSize(delta)); + assertEquals(data.length, BinaryDelta.getResultSize(delta)); + assertTrue(Arrays.equals(data, BinaryDelta.apply(base, delta))); + + byte[] act = new byte[data.length]; + DeltaStream in = open(); + assertEquals(data.length, in.getSize()); + assertEquals(data.length, in.read(act)); + assertEquals(-1, in.read()); + assertTrue(Arrays.equals(data, act)); + } + + private DeltaStream open() throws IOException { + return new DeltaStream(new ByteArrayInputStream(delta)) { + @Override + protected long getBaseSize() throws IOException { + return base.length; + } + + @Override + protected InputStream openBase() throws IOException { + return new ByteArrayInputStream(base); + } + }; + } +} |