aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn O. Pearce <spearce@spearce.org>2010-09-16 17:02:27 -0700
committerChris Aniszczyk <caniszczyk@gmail.com>2010-10-04 14:04:47 -0500
commit7ba31474a334cf8f79ecd0c85598550f415b70c5 (patch)
treef11e95de8177a8f54c3a301803cb0abdc41e8159
parentb0bfa8044a31262975a95ab11d8f52cdd387634d (diff)
downloadjgit-7ba31474a334cf8f79ecd0c85598550f415b70c5.tar.gz
jgit-7ba31474a334cf8f79ecd0c85598550f415b70c5.zip
Increase core.streamFileThreshold default to 50 MiB
Projects like org.eclipse.mdt contain large XML files about 6 MiB in size. So does the Android project platform/frameworks/base. Doing a clone of either project with JGit takes forever to checkout the files into the working directory, because delta decompression tends to be very expensive as we need to constantly reposition the base stream for each copy instruction. This can be made worse by a very bad ordering of offsets, possibly due to an XML editor that doesn't preserve the order of elements in the file very well. Increasing the threshold to the same limit PackWriter uses when doing delta compression (50 MiB) permits a default configured JGit to decompress these XML file objects using the faster random-access arrays, rather than re-seeking through an inflate stream, significantly reducing checkout time after a clone. Since this new limit may be dangerously close to the JVM maximum heap size, every allocation attempt is now wrapped in a try/catch so that JGit can degrade by switching to the large object stream mode when the allocation is refused. It will run slower, but the operation will still complete. The large stream mode will run very well for big objects that aren't delta compressed, and is acceptable for delta compressed objects that are using only forward referencing copy instructions. Copies using prior offsets are still going to be horrible, and there is nothing we can do about it except increase core.streamFileThreshold. We might in the future want to consider changing the way the delta generators work in JGit and native C Git to avoid prior offsets once an object reaches a certain size, even if that causes the delta instruction stream to be slightly larger. Unfortunately native C Git won't want to do that until its also able to stream objects rather than malloc them as contiguous blocks. Change-Id: Ief7a3896afce15073e80d3691bed90c6a3897307 Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java70
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java18
-rw-r--r--org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties1
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java1
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java8
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java86
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java4
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java26
8 files changed, 105 insertions, 109 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
index 9a4bc0b1ab..74c9f06557 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
@@ -70,6 +70,8 @@ import org.eclipse.jgit.util.NB;
import org.eclipse.jgit.util.TemporaryBuffer;
public class PackFileTest extends LocalDiskRepositoryTestCase {
+ private int streamThreshold = 16 * 1024;
+
private TestRng rng;
private FileRepository repo;
@@ -80,6 +82,11 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception {
super.setUp();
+
+ WindowCacheConfig cfg = new WindowCacheConfig();
+ cfg.setStreamFileThreshold(streamThreshold);
+ WindowCache.reconfigure(cfg);
+
rng = new TestRng(getName());
repo = createBareRepository();
tr = new TestRepository<FileRepository>(repo);
@@ -89,6 +96,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
protected void tearDown() throws Exception {
if (wc != null)
wc.release();
+ WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown();
}
@@ -120,7 +128,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
public void testWhole_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
RevBlob id = tr.blob(data);
tr.branch("master").commit().add("A", id).create();
tr.packAndPrune();
@@ -213,7 +221,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
public void testDelta_LargeObjectChain() throws Exception {
ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
- byte[] data0 = new byte[ObjectLoader.STREAM_THRESHOLD + 5];
+ byte[] data0 = new byte[streamThreshold + 5];
Arrays.fill(data0, (byte) 0xf3);
ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
@@ -277,64 +285,6 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
in.close();
}
- public void testDelta_LargeInstructionStream() throws Exception {
- ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
- byte[] data0 = new byte[32];
- Arrays.fill(data0, (byte) 0xf3);
- ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
-
- byte[] data3 = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
- ByteArrayOutputStream tmp = new ByteArrayOutputStream();
- DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length);
- de.insert(data3, 0, data3.length);
- byte[] delta3 = tmp.toByteArray();
- assertTrue(delta3.length > ObjectLoader.STREAM_THRESHOLD);
-
- TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(
- ObjectLoader.STREAM_THRESHOLD + 1024);
- packHeader(pack, 2);
- objectHeader(pack, Constants.OBJ_BLOB, data0.length);
- deflate(pack, data0);
-
- ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
- objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
- id0.copyRawTo(pack);
- deflate(pack, delta3);
-
- digest(pack);
- final byte[] raw = pack.toByteArray();
- IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
- ip.setFixThin(true);
- ip.index(NullProgressMonitor.INSTANCE);
- ip.renameAndOpenPack();
-
- assertTrue("has blob", wc.has(id3));
-
- ObjectLoader ol = wc.open(id3);
- assertNotNull("created loader", ol);
- assertEquals(Constants.OBJ_BLOB, ol.getType());
- assertEquals(data3.length, ol.getSize());
- assertTrue("is large", ol.isLarge());
- try {
- ol.getCachedBytes();
- fail("Should have thrown LargeObjectException");
- } catch (LargeObjectException tooBig) {
- assertEquals(MessageFormat.format(
- JGitText.get().largeObjectException, id3.name()), tooBig
- .getMessage());
- }
-
- ObjectStream in = ol.openStream();
- assertNotNull("have stream", in);
- assertEquals(Constants.OBJ_BLOB, in.getType());
- assertEquals(data3.length, in.getSize());
- byte[] act = new byte[data3.length];
- IO.readFully(in, act, 0, data3.length);
- assertTrue("same content", Arrays.equals(act, data3));
- assertEquals("stream at EOF", -1, in.read());
- in.close();
- }
-
private byte[] clone(int first, byte[] base) {
byte[] r = new byte[base.length];
System.arraycopy(base, 1, r, 1, r.length - 1);
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java
index ab7445b17b..547565ea82 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java
@@ -67,6 +67,8 @@ import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.util.IO;
public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
+ private int streamThreshold = 16 * 1024;
+
private TestRng rng;
private FileRepository repo;
@@ -75,6 +77,11 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
protected void setUp() throws Exception {
super.setUp();
+
+ WindowCacheConfig cfg = new WindowCacheConfig();
+ cfg.setStreamFileThreshold(streamThreshold);
+ WindowCache.reconfigure(cfg);
+
rng = new TestRng(getName());
repo = createBareRepository();
wc = (WindowCursor) repo.newObjectReader();
@@ -83,6 +90,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
protected void tearDown() throws Exception {
if (wc != null)
wc.release();
+ WindowCache.reconfigure(new WindowCacheConfig());
super.tearDown();
}
@@ -113,7 +121,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
public void testStandardFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressStandardFormat(type, data));
@@ -268,7 +276,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
public void testStandardFormat_LargeObject_CorruptZLibStream()
throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
gz[gz.length - 1] = 0;
@@ -305,7 +313,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
public void testStandardFormat_LargeObject_TruncatedZLibStream()
throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length - 1];
@@ -339,7 +347,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
public void testStandardFormat_LargeObject_TrailingGarbage()
throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
byte[] gz = compressStandardFormat(type, data);
byte[] tr = new byte[gz.length + 1];
@@ -396,7 +404,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase {
public void testPackFormat_LargeObject() throws Exception {
final int type = Constants.OBJ_BLOB;
- byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5);
+ byte[] data = rng.nextBytes(streamThreshold + 5);
ObjectId id = new ObjectInserter.Formatter().idFor(type, data);
write(id, compressPackFormat(type, data));
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties
index b6336283c7..db7a952b27 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties
@@ -335,6 +335,7 @@ repositoryState_rebaseOrApplyMailbox=Rebase/Apply mailbox
repositoryState_rebaseWithMerge=Rebase w/merge
requiredHashFunctionNotAvailable=Required hash function {0} not available.
resolvingDeltas=Resolving deltas
+resultLengthIncorrect=result length incorrect
searchForReuse=Finding sources
sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm.
serviceNotPermitted={0} not permitted
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java
index bb6ffe1840..dceefa2f4e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java
@@ -395,6 +395,7 @@ public class JGitText extends TranslationBundle {
/***/ public String repositoryState_rebaseWithMerge;
/***/ public String requiredHashFunctionNotAvailable;
/***/ public String resolvingDeltas;
+ /***/ public String resultLengthIncorrect;
/***/ public String searchForReuse;
/***/ public String sequenceTooLargeForDiffAlgorithm;
/***/ public String serviceNotPermitted;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java
index fe4a7fdc0d..b2cc29426e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java
@@ -61,14 +61,6 @@ import org.eclipse.jgit.util.IO;
*/
public abstract class ObjectLoader {
/**
- * Default setting for the large object threshold.
- * <p>
- * Objects larger than this size must be accessed as a stream through the
- * loader's {@link #openStream()} method.
- */
- public static final int STREAM_THRESHOLD = 5 * 1024 * 1024;
-
- /**
* @return Git in pack object type, see {@link Constants}.
*/
public abstract int getType();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
index 5239111623..3ae0b8f390 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
@@ -63,6 +63,7 @@ import java.util.zip.Inflater;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.errors.PackMismatchException;
@@ -274,12 +275,11 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
return getReverseIdx().findObject(offset);
}
- private final byte[] decompress(final long position, final long totalSize,
- final WindowCursor curs) throws IOException, DataFormatException {
- final byte[] dstbuf = new byte[(int) totalSize];
- if (curs.inflate(this, position, dstbuf, 0) != totalSize)
+ private final void decompress(final long position, final WindowCursor curs,
+ final byte[] dstbuf, final int dstoff, final int dstsz)
+ throws IOException, DataFormatException {
+ if (curs.inflate(this, position, dstbuf, dstoff) != dstsz)
throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position));
- return dstbuf;
}
final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
@@ -630,10 +630,16 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
case Constants.OBJ_BLOB:
case Constants.OBJ_TAG: {
if (sz < curs.getStreamFileThreshold()) {
- byte[] data = decompress(pos + p, sz, curs);
+ byte[] data;
+ try {
+ data = new byte[(int) sz];
+ } catch (OutOfMemoryError tooBig) {
+ return largeWhole(curs, pos, type, sz, p);
+ }
+ decompress(pos + p, curs, data, 0, data.length);
return new ObjectLoader.SmallObject(type, data);
}
- return new LargePackedWholeObject(type, sz, pos, p, this, curs.db);
+ return largeWhole(curs, pos, type, sz, p);
}
case Constants.OBJ_OFS_DELTA: {
@@ -680,44 +686,58 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz,
long posBase, WindowCursor curs) throws IOException,
DataFormatException {
- if (curs.getStreamFileThreshold() <= sz) {
- // The delta instruction stream itself is pretty big, and
- // that implies the resulting object is going to be massive.
- // Use only the large delta format here.
- //
- return new LargePackedDeltaObject(posSelf, posBase, hdrLen, //
- this, curs.db);
- }
+ if (Integer.MAX_VALUE <= sz)
+ return largeDelta(posSelf, hdrLen, posBase, curs);
- byte[] data;
+ byte[] base;
int type;
DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase);
if (e != null) {
- data = e.data;
+ base = e.data;
type = e.type;
} else {
ObjectLoader p = load(curs, posBase);
- if (p.isLarge()) {
- // The base itself is large. We have to produce a large
- // delta stream as we don't want to build the whole base.
- //
- return new LargePackedDeltaObject(posSelf, posBase, hdrLen,
- this, curs.db);
+ try {
+ base = p.getCachedBytes(curs.getStreamFileThreshold());
+ } catch (LargeObjectException tooBig) {
+ return largeDelta(posSelf, hdrLen, posBase, curs);
}
- data = p.getCachedBytes();
type = p.getType();
- DeltaBaseCache.store(this, posBase, data, type);
+ DeltaBaseCache.store(this, posBase, base, type);
}
- // At this point we have the base, and its small, and the delta
- // stream also is small, so the result object cannot be more than
- // 2x our small size. This occurs if the delta instructions were
- // "copy entire base, literal insert entire delta". Go with the
- // faster small object style at this point.
- //
- data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs));
- return new ObjectLoader.SmallObject(type, data);
+ final byte[] delta;
+ try {
+ delta = new byte[(int) sz];
+ } catch (OutOfMemoryError tooBig) {
+ return largeDelta(posSelf, hdrLen, posBase, curs);
+ }
+
+ decompress(posSelf + hdrLen, curs, delta, 0, delta.length);
+ sz = BinaryDelta.getResultSize(delta);
+ if (Integer.MAX_VALUE <= sz)
+ return largeDelta(posSelf, hdrLen, posBase, curs);
+
+ final byte[] result;
+ try {
+ result = new byte[(int) sz];
+ } catch (OutOfMemoryError tooBig) {
+ return largeDelta(posSelf, hdrLen, posBase, curs);
+ }
+
+ BinaryDelta.apply(base, delta, result);
+ return new ObjectLoader.SmallObject(type, result);
+ }
+
+ private LargePackedWholeObject largeWhole(final WindowCursor curs,
+ final long pos, final int type, long sz, int p) {
+ return new LargePackedWholeObject(type, sz, pos, p, this, curs.db);
+ }
+
+ private LargePackedDeltaObject largeDelta(long posObj, int hdrLen,
+ long posBase, WindowCursor wc) {
+ return new LargePackedDeltaObject(posObj, posBase, hdrLen, this, wc.db);
}
byte[] getDeltaHeader(WindowCursor wc, long pos)
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java
index 95ec6f7c76..c8e0d5584e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java
@@ -44,7 +44,7 @@
package org.eclipse.jgit.storage.file;
import org.eclipse.jgit.lib.Config;
-import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.storage.pack.PackConfig;
/** Configuration parameters for {@link WindowCache}. */
public class WindowCacheConfig {
@@ -73,7 +73,7 @@ public class WindowCacheConfig {
packedGitWindowSize = 8 * KB;
packedGitMMAP = false;
deltaBaseCacheLimit = 10 * MB;
- streamFileThreshold = ObjectLoader.STREAM_THRESHOLD;
+ streamFileThreshold = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
}
/**
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
index 1d433d78a6..eead163ee8 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
@@ -115,6 +115,25 @@ public class BinaryDelta {
* @return patched base
*/
public static final byte[] apply(final byte[] base, final byte[] delta) {
+ return apply(base, delta, null);
+ }
+
+ /**
+ * Apply the changes defined by delta to the data in base, yielding a new
+ * array of bytes.
+ *
+ * @param base
+ * some byte representing an object of some kind.
+ * @param delta
+ * a git pack delta defining the transform from one version to
+ * another.
+ * @param result
+ * array to store the result into. If null the result will be
+ * allocated and returned.
+ * @return either {@code result}, or the result array allocated.
+ */
+ public static final byte[] apply(final byte[] base, final byte[] delta,
+ byte[] result) {
int deltaPtr = 0;
// Length of the base object (a variable length int).
@@ -140,7 +159,12 @@ public class BinaryDelta {
shift += 7;
} while ((c & 0x80) != 0);
- final byte[] result = new byte[resLen];
+ if (result == null)
+ result = new byte[resLen];
+ else if (result.length != resLen)
+ throw new IllegalArgumentException(
+ JGitText.get().resultLengthIncorrect);
+
int resultPtr = 0;
while (deltaPtr < delta.length) {
final int cmd = delta[deltaPtr++] & 0xff;