summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGal Paikin <paiking@google.com>2020-12-07 15:18:34 +0100
committerGal Paikin <paiking@google.com>2021-01-27 02:22:51 -0500
commit31e3cb4375f92e56f27b83c4583523c14a712b2d (patch)
tree81ed497ee77f7862e067d28590e33b34be71459b
parenta6b90b7ec5c238692dc323e25ef927e4433edb1d (diff)
downloadjgit-31e3cb4375f92e56f27b83c4583523c14a712b2d.tar.gz
jgit-31e3cb4375f92e56f27b83c4583523c14a712b2d.zip
Compare getting all refs except specific refs with seek and with filter
There are currently two ways to get all refs except a specific ref, we add two methods that perform both and compare the two different approaches. This change adds two methods that compares the two different approaches of such query: 1. Get all the refs, and then filter by refs that don't start with the prefix (current approach). 2. Get all refs until encountering a ref that is part of the prefix we should exclude, skip using seekPastPrefix, and continue (new approach). This works since the refs are sorted. Specifically in Gerrit, we often have thousands of refs that are not refs/changes, and millions of refs/changes, hence the second approach should be much faster. In Jgit in general it's still expected to provide a better result even if we're skipping a smaller chunk of the refs since the complexity here is O(logn) with a binary search, rather than O(number of skipped refs). We ran this benchmark on a big chunk of chromium/src's reftable. To run it, we first create the reftable: git ls-remote https://chromium.googlesource.com/chromium/src > lsr bazel build org.eclipse.jgit.pgm:jgit && rm -rf /tmp/reftable* && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test write_stack lsr /tmp/reftable Then, we actually test the created reftable. Note that we can't test all of them at once since there are multiple ones, but below is a good example. bazel build org.eclipse.jgit.pgm:jgit && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test get_refs_excluding_ref --ref refs/changes \ lsr /tmp/reftable/000000000001-0000001e0371.ref Result: total time the action took using seek: 36925 usec total time the action took using filter: 874382 usec number of refs that start with prefix: 4266. number of refs that don't start with prefix: 1962695. Similarly for Android's biggest repository, platform/frameworks/base (still only partial result): total time the action took using seek: 9020 usec total time the action took using filter: 143166 usec number of refs that start with prefix: 296. number of refs that don't start with prefix: 60400. In conclusion, it's easy to see an improvement of a factor of 15-20x for large Gerrit repositories! Signed-off-by: Gal Paikin <paiking@google.com> Change-Id: I36d9b63eb259804c774864429cf2c761cd099cc3
-rw-r--r--org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java54
1 files changed, 53 insertions, 1 deletions
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java
index 630fac549e..f23f4cf0ea 100644
--- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java
@@ -23,7 +23,9 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.util.ArrayList;
import java.util.List;
+import java.util.stream.Collectors;
import org.eclipse.jgit.internal.storage.file.FileReftableStack;
import org.eclipse.jgit.internal.storage.io.BlockSource;
@@ -47,6 +49,7 @@ class BenchmarkReftable extends TextBuiltin {
SEEK_COLD, SEEK_HOT,
BY_ID_COLD, BY_ID_HOT,
WRITE_STACK,
+ GET_REFS_EXCLUDING_REF
}
@Option(name = "--tries")
@@ -91,7 +94,11 @@ class BenchmarkReftable extends TextBuiltin {
case WRITE_STACK:
writeStack();
break;
- }
+ case GET_REFS_EXCLUDING_REF :
+ getRefsExcludingWithSeekPast(ref);
+ getRefsExcludingWithFilter(ref);
+ break;
+ }
}
private void printf(String fmt, Object... args) throws IOException {
@@ -315,4 +322,49 @@ class BenchmarkReftable extends TextBuiltin {
printf("%12s %10d usec %9.1f usec/run %5d runs", "reftable",
tot / 1000, (((double) tot) / tries) / 1000, tries);
}
+
+ @SuppressWarnings({"nls", "boxing"})
+ private void getRefsExcludingWithFilter(String prefix) throws Exception {
+ long startTime = System.nanoTime();
+ List<Ref> allRefs = new ArrayList<>();
+ try (FileInputStream in = new FileInputStream(reftablePath);
+ BlockSource src = BlockSource.from(in);
+ ReftableReader reader = new ReftableReader(src)) {
+ try (RefCursor rc = reader.allRefs()) {
+ while (rc.next()) {
+ allRefs.add(rc.getRef());
+ }
+ }
+ }
+ int total = allRefs.size();
+ allRefs = allRefs.stream().filter(r -> r.getName().startsWith(prefix)).collect(Collectors.toList());
+ int notStartWithPrefix = allRefs.size();
+ int startWithPrefix = total - notStartWithPrefix;
+ long totalTime = System.nanoTime() - startTime;
+ printf("total time the action took using filter: %10d usec", totalTime / 1000);
+ printf("number of refs that start with prefix: %d", startWithPrefix);
+ printf("number of refs that don't start with prefix: %d", notStartWithPrefix);
+ }
+
+ @SuppressWarnings({"nls", "boxing"})
+ private void getRefsExcludingWithSeekPast(String prefix) throws Exception {
+ long start = System.nanoTime();
+ try (FileInputStream in = new FileInputStream(reftablePath);
+ BlockSource src = BlockSource.from(in);
+ ReftableReader reader = new ReftableReader(src)) {
+ try (RefCursor rc = reader.allRefs()) {
+ while (rc.next()) {
+ if (rc.getRef().getName().startsWith(prefix)) {
+ break;
+ }
+ }
+ rc.seekPastPrefix(prefix);
+ while (rc.next()) {
+ rc.getRef();
+ }
+ }
+ }
+ long tot = System.nanoTime() - start;
+ printf("total time the action took using seek: %10d usec", tot / 1000);
+ }
}