diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-09-16 17:02:27 -0700 |
---|---|---|
committer | Chris Aniszczyk <caniszczyk@gmail.com> | 2010-10-04 14:04:47 -0500 |
commit | 7ba31474a334cf8f79ecd0c85598550f415b70c5 (patch) | |
tree | f11e95de8177a8f54c3a301803cb0abdc41e8159 | |
parent | b0bfa8044a31262975a95ab11d8f52cdd387634d (diff) | |
download | jgit-7ba31474a334cf8f79ecd0c85598550f415b70c5.tar.gz jgit-7ba31474a334cf8f79ecd0c85598550f415b70c5.zip |
Increase core.streamFileThreshold default to 50 MiB
Projects like org.eclipse.mdt contain large XML files about 6 MiB
in size. So does the Android project platform/frameworks/base.
Doing a clone of either project with JGit takes forever to checkout
the files into the working directory, because delta decompression
tends to be very expensive as we need to constantly reposition the
base stream for each copy instruction. This can be made worse by
a very bad ordering of offsets, possibly due to an XML editor that
doesn't preserve the order of elements in the file very well.
Increasing the threshold to the same limit PackWriter uses when
doing delta compression (50 MiB) permits a default configured
JGit to decompress these XML file objects using the faster
random-access arrays, rather than re-seeking through an inflate
stream, significantly reducing checkout time after a clone.
Since this new limit may be dangerously close to the JVM maximum
heap size, every allocation attempt is now wrapped in a try/catch
so that JGit can degrade by switching to the large object stream
mode when the allocation is refused. It will run slower, but the
operation will still complete.
The large stream mode will run very well for big objects that aren't
delta compressed, and is acceptable for delta compressed objects that
are using only forward referencing copy instructions. Copies using
prior offsets are still going to be horrible, and there is nothing
we can do about it except increase core.streamFileThreshold.
We might in the future want to consider changing the way the delta
generators work in JGit and native C Git to avoid prior offsets once
an object reaches a certain size, even if that causes the delta
instruction stream to be slightly larger. Unfortunately native
C Git won't want to do that until its also able to stream objects
rather than malloc them as contiguous blocks.
Change-Id: Ief7a3896afce15073e80d3691bed90c6a3897307
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
8 files changed, 105 insertions, 109 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java index 9a4bc0b1ab..74c9f06557 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java @@ -70,6 +70,8 @@ import org.eclipse.jgit.util.NB; import org.eclipse.jgit.util.TemporaryBuffer; public class PackFileTest extends LocalDiskRepositoryTestCase { + private int streamThreshold = 16 * 1024; + private TestRng rng; private FileRepository repo; @@ -80,6 +82,11 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { protected void setUp() throws Exception { super.setUp(); + + WindowCacheConfig cfg = new WindowCacheConfig(); + cfg.setStreamFileThreshold(streamThreshold); + WindowCache.reconfigure(cfg); + rng = new TestRng(getName()); repo = createBareRepository(); tr = new TestRepository<FileRepository>(repo); @@ -89,6 +96,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { protected void tearDown() throws Exception { if (wc != null) wc.release(); + WindowCache.reconfigure(new WindowCacheConfig()); super.tearDown(); } @@ -120,7 +128,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { public void testWhole_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); RevBlob id = tr.blob(data); tr.branch("master").commit().add("A", id).create(); tr.packAndPrune(); @@ -213,7 +221,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { public void testDelta_LargeObjectChain() throws Exception { ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); - byte[] data0 = new byte[ObjectLoader.STREAM_THRESHOLD + 5]; + byte[] data0 = new byte[streamThreshold + 5]; Arrays.fill(data0, (byte) 0xf3); ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); @@ -277,64 +285,6 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { in.close(); } - public void testDelta_LargeInstructionStream() throws Exception { - ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); - byte[] data0 = new byte[32]; - Arrays.fill(data0, (byte) 0xf3); - ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); - - byte[] data3 = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); - ByteArrayOutputStream tmp = new ByteArrayOutputStream(); - DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length); - de.insert(data3, 0, data3.length); - byte[] delta3 = tmp.toByteArray(); - assertTrue(delta3.length > ObjectLoader.STREAM_THRESHOLD); - - TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap( - ObjectLoader.STREAM_THRESHOLD + 1024); - packHeader(pack, 2); - objectHeader(pack, Constants.OBJ_BLOB, data0.length); - deflate(pack, data0); - - ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3); - objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length); - id0.copyRawTo(pack); - deflate(pack, delta3); - - digest(pack); - final byte[] raw = pack.toByteArray(); - IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw)); - ip.setFixThin(true); - ip.index(NullProgressMonitor.INSTANCE); - ip.renameAndOpenPack(); - - assertTrue("has blob", wc.has(id3)); - - ObjectLoader ol = wc.open(id3); - assertNotNull("created loader", ol); - assertEquals(Constants.OBJ_BLOB, ol.getType()); - assertEquals(data3.length, ol.getSize()); - assertTrue("is large", ol.isLarge()); - try { - ol.getCachedBytes(); - fail("Should have thrown LargeObjectException"); - } catch (LargeObjectException tooBig) { - assertEquals(MessageFormat.format( - JGitText.get().largeObjectException, id3.name()), tooBig - .getMessage()); - } - - ObjectStream in = ol.openStream(); - assertNotNull("have stream", in); - assertEquals(Constants.OBJ_BLOB, in.getType()); - assertEquals(data3.length, in.getSize()); - byte[] act = new byte[data3.length]; - IO.readFully(in, act, 0, data3.length); - assertTrue("same content", Arrays.equals(act, data3)); - assertEquals("stream at EOF", -1, in.read()); - in.close(); - } - private byte[] clone(int first, byte[] base) { byte[] r = new byte[base.length]; System.arraycopy(base, 1, r, 1, r.length - 1); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java index ab7445b17b..547565ea82 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java @@ -67,6 +67,8 @@ import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.util.IO; public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { + private int streamThreshold = 16 * 1024; + private TestRng rng; private FileRepository repo; @@ -75,6 +77,11 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { protected void setUp() throws Exception { super.setUp(); + + WindowCacheConfig cfg = new WindowCacheConfig(); + cfg.setStreamFileThreshold(streamThreshold); + WindowCache.reconfigure(cfg); + rng = new TestRng(getName()); repo = createBareRepository(); wc = (WindowCursor) repo.newObjectReader(); @@ -83,6 +90,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { protected void tearDown() throws Exception { if (wc != null) wc.release(); + WindowCache.reconfigure(new WindowCacheConfig()); super.tearDown(); } @@ -113,7 +121,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); write(id, compressStandardFormat(type, data)); @@ -268,7 +276,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_CorruptZLibStream() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); gz[gz.length - 1] = 0; @@ -305,7 +313,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_TruncatedZLibStream() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); byte[] tr = new byte[gz.length - 1]; @@ -339,7 +347,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_TrailingGarbage() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); byte[] tr = new byte[gz.length + 1]; @@ -396,7 +404,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testPackFormat_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); write(id, compressPackFormat(type, data)); diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index b6336283c7..db7a952b27 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -335,6 +335,7 @@ repositoryState_rebaseOrApplyMailbox=Rebase/Apply mailbox repositoryState_rebaseWithMerge=Rebase w/merge requiredHashFunctionNotAvailable=Required hash function {0} not available. resolvingDeltas=Resolving deltas +resultLengthIncorrect=result length incorrect searchForReuse=Finding sources sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm. serviceNotPermitted={0} not permitted diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index bb6ffe1840..dceefa2f4e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -395,6 +395,7 @@ public class JGitText extends TranslationBundle { /***/ public String repositoryState_rebaseWithMerge; /***/ public String requiredHashFunctionNotAvailable; /***/ public String resolvingDeltas; + /***/ public String resultLengthIncorrect; /***/ public String searchForReuse; /***/ public String sequenceTooLargeForDiffAlgorithm; /***/ public String serviceNotPermitted; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index fe4a7fdc0d..b2cc29426e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -61,14 +61,6 @@ import org.eclipse.jgit.util.IO; */ public abstract class ObjectLoader { /** - * Default setting for the large object threshold. - * <p> - * Objects larger than this size must be accessed as a stream through the - * loader's {@link #openStream()} method. - */ - public static final int STREAM_THRESHOLD = 5 * 1024 * 1024; - - /** * @return Git in pack object type, see {@link Constants}. */ public abstract int getType(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java index 5239111623..3ae0b8f390 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java @@ -63,6 +63,7 @@ import java.util.zip.Inflater; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.PackInvalidException; import org.eclipse.jgit.errors.PackMismatchException; @@ -274,12 +275,11 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { return getReverseIdx().findObject(offset); } - private final byte[] decompress(final long position, final long totalSize, - final WindowCursor curs) throws IOException, DataFormatException { - final byte[] dstbuf = new byte[(int) totalSize]; - if (curs.inflate(this, position, dstbuf, 0) != totalSize) + private final void decompress(final long position, final WindowCursor curs, + final byte[] dstbuf, final int dstoff, final int dstsz) + throws IOException, DataFormatException { + if (curs.inflate(this, position, dstbuf, dstoff) != dstsz) throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position)); - return dstbuf; } final void copyAsIs(PackOutputStream out, LocalObjectToPack src, @@ -630,10 +630,16 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { case Constants.OBJ_BLOB: case Constants.OBJ_TAG: { if (sz < curs.getStreamFileThreshold()) { - byte[] data = decompress(pos + p, sz, curs); + byte[] data; + try { + data = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeWhole(curs, pos, type, sz, p); + } + decompress(pos + p, curs, data, 0, data.length); return new ObjectLoader.SmallObject(type, data); } - return new LargePackedWholeObject(type, sz, pos, p, this, curs.db); + return largeWhole(curs, pos, type, sz, p); } case Constants.OBJ_OFS_DELTA: { @@ -680,44 +686,58 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz, long posBase, WindowCursor curs) throws IOException, DataFormatException { - if (curs.getStreamFileThreshold() <= sz) { - // The delta instruction stream itself is pretty big, and - // that implies the resulting object is going to be massive. - // Use only the large delta format here. - // - return new LargePackedDeltaObject(posSelf, posBase, hdrLen, // - this, curs.db); - } + if (Integer.MAX_VALUE <= sz) + return largeDelta(posSelf, hdrLen, posBase, curs); - byte[] data; + byte[] base; int type; DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase); if (e != null) { - data = e.data; + base = e.data; type = e.type; } else { ObjectLoader p = load(curs, posBase); - if (p.isLarge()) { - // The base itself is large. We have to produce a large - // delta stream as we don't want to build the whole base. - // - return new LargePackedDeltaObject(posSelf, posBase, hdrLen, - this, curs.db); + try { + base = p.getCachedBytes(curs.getStreamFileThreshold()); + } catch (LargeObjectException tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); } - data = p.getCachedBytes(); type = p.getType(); - DeltaBaseCache.store(this, posBase, data, type); + DeltaBaseCache.store(this, posBase, base, type); } - // At this point we have the base, and its small, and the delta - // stream also is small, so the result object cannot be more than - // 2x our small size. This occurs if the delta instructions were - // "copy entire base, literal insert entire delta". Go with the - // faster small object style at this point. - // - data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs)); - return new ObjectLoader.SmallObject(type, data); + final byte[] delta; + try { + delta = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); + } + + decompress(posSelf + hdrLen, curs, delta, 0, delta.length); + sz = BinaryDelta.getResultSize(delta); + if (Integer.MAX_VALUE <= sz) + return largeDelta(posSelf, hdrLen, posBase, curs); + + final byte[] result; + try { + result = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); + } + + BinaryDelta.apply(base, delta, result); + return new ObjectLoader.SmallObject(type, result); + } + + private LargePackedWholeObject largeWhole(final WindowCursor curs, + final long pos, final int type, long sz, int p) { + return new LargePackedWholeObject(type, sz, pos, p, this, curs.db); + } + + private LargePackedDeltaObject largeDelta(long posObj, int hdrLen, + long posBase, WindowCursor wc) { + return new LargePackedDeltaObject(posObj, posBase, hdrLen, this, wc.db); } byte[] getDeltaHeader(WindowCursor wc, long pos) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java index 95ec6f7c76..c8e0d5584e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java @@ -44,7 +44,7 @@ package org.eclipse.jgit.storage.file; import org.eclipse.jgit.lib.Config; -import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.storage.pack.PackConfig; /** Configuration parameters for {@link WindowCache}. */ public class WindowCacheConfig { @@ -73,7 +73,7 @@ public class WindowCacheConfig { packedGitWindowSize = 8 * KB; packedGitMMAP = false; deltaBaseCacheLimit = 10 * MB; - streamFileThreshold = ObjectLoader.STREAM_THRESHOLD; + streamFileThreshold = PackConfig.DEFAULT_BIG_FILE_THRESHOLD; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java index 1d433d78a6..eead163ee8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java @@ -115,6 +115,25 @@ public class BinaryDelta { * @return patched base */ public static final byte[] apply(final byte[] base, final byte[] delta) { + return apply(base, delta, null); + } + + /** + * Apply the changes defined by delta to the data in base, yielding a new + * array of bytes. + * + * @param base + * some byte representing an object of some kind. + * @param delta + * a git pack delta defining the transform from one version to + * another. + * @param result + * array to store the result into. If null the result will be + * allocated and returned. + * @return either {@code result}, or the result array allocated. + */ + public static final byte[] apply(final byte[] base, final byte[] delta, + byte[] result) { int deltaPtr = 0; // Length of the base object (a variable length int). @@ -140,7 +159,12 @@ public class BinaryDelta { shift += 7; } while ((c & 0x80) != 0); - final byte[] result = new byte[resLen]; + if (result == null) + result = new byte[resLen]; + else if (result.length != resLen) + throw new IllegalArgumentException( + JGitText.get().resultLengthIncorrect); + int resultPtr = 0; while (deltaPtr < delta.length) { final int cmd = delta[deltaPtr++] & 0xff; |