diff options
author | roberto <roberto.tyley@guardian.co.uk> | 2010-12-09 23:40:34 +0000 |
---|---|---|
committer | roberto <roberto.tyley@guardian.co.uk> | 2010-12-09 23:46:47 +0000 |
commit | 941b3d8a8105333ccbc15e3b57574fce6da6f006 (patch) | |
tree | 1696100d15d2655e2e70c5efc8cdac646599cdb5 | |
parent | c181e1ab8ac52ff8ce02f95affc8f79cfbfff323 (diff) | |
download | jgit-941b3d8a8105333ccbc15e3b57574fce6da6f006.tar.gz jgit-941b3d8a8105333ccbc15e3b57574fce6da6f006.zip |
IndexPack: Remove blob-streaming size threshold
Always use streaming (for SHA-checksum & collision detection)
when indexing whole blobs, regardless of their size.
Positives:
* benefits of bugfix #312868 will apply to all runtimes, without
additional conf for mem-constrained JVMs (5MB huge for some)
* no byte array allocation
(re-uses readBuffer instead of allocating new full-size array)
* mildly better overall performance
(given the usual blob-does-not-need-collision-checking case)
* removes unnecessary code
Negative:
* doubles the disk IO for a blob comparision
(comparitively rare occurance)
I perf-tested a range of threshold sizes against a random selection
of packfiles I found on my harddrive, the results are here:
https://spreadsheets.google.com/ccc?key=tLCQElyyd2RKN9QevfvgwGQ&hl=en_GB#gid=1
My interpretation of the results is that the streaming size threshold
isn't beneficial (actually seems to be very slightly detrimental) -so
we should just get rid of it. This tallies with some of the comments
Shawn & I had for the default value of streamFileThreshold in the
review for I862afd4c:
http://egit.eclipse.org/r/#patch,sidebyside,2040,2,org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java
The perf-test code is here: https://gist.github.com/735402
It's a bit scruffy but basically does 10 runs (in randomised order)
for each threshold size on various packfiles, waiting a second
between each pack-indexing to allow GC to catch up. I know it's not
perfect - proper perf testing is hard to do :-)
-rw-r--r-- | org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java | 1 | ||||
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java | 9 |
2 files changed, 1 insertions, 9 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java index 6015428d99..65929059ab 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/IndexPackTest.java @@ -166,7 +166,6 @@ public class IndexPackTest extends RepositoryTestCase { final byte[] raw = pack.toByteArray(); IndexPack ip = IndexPack.create(db, new ByteArrayInputStream(raw)); - ip.setStreamFileThreshold(1); ip.index(NullProgressMonitor.INSTANCE); ip.renameAndOpenPack(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java index 8feb483fe2..bb9edc2669 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java @@ -145,8 +145,6 @@ public class IndexPack { private final Repository repo; - private int streamFileThreshold; - /** * Object database used for loading existing objects */ @@ -244,7 +242,6 @@ public class IndexPack { public IndexPack(final Repository db, final InputStream src, final File dstBase) throws IOException { repo = db; - streamFileThreshold = 5 * (1 << 20); // A reasonable default for now. objectDatabase = db.getObjectDatabase().newCachedDatabase(); in = src; inflater = new InflaterStream(); @@ -268,10 +265,6 @@ public class IndexPack { } } - void setStreamFileThreshold(int sz) { - streamFileThreshold = sz; - } - /** * Set the pack index file format version this instance will create. * @@ -860,7 +853,7 @@ public class IndexPack { objectDigest.update((byte) 0); boolean checkContentLater = false; - if (type == Constants.OBJ_BLOB && sz >= streamFileThreshold) { + if (type == Constants.OBJ_BLOB) { InputStream inf = inflate(Source.INPUT, sz); long cnt = 0; while (cnt < sz) { |