From 31e3cb4375f92e56f27b83c4583523c14a712b2d Mon Sep 17 00:00:00 2001 From: Gal Paikin <paiking@google.com> Date: Mon, 7 Dec 2020 15:18:34 +0100 Subject: Compare getting all refs except specific refs with seek and with filter There are currently two ways to get all refs except a specific ref, we add two methods that perform both and compare the two different approaches. This change adds two methods that compares the two different approaches of such query: 1. Get all the refs, and then filter by refs that don't start with the prefix (current approach). 2. Get all refs until encountering a ref that is part of the prefix we should exclude, skip using seekPastPrefix, and continue (new approach). This works since the refs are sorted. Specifically in Gerrit, we often have thousands of refs that are not refs/changes, and millions of refs/changes, hence the second approach should be much faster. In Jgit in general it's still expected to provide a better result even if we're skipping a smaller chunk of the refs since the complexity here is O(logn) with a binary search, rather than O(number of skipped refs). We ran this benchmark on a big chunk of chromium/src's reftable. To run it, we first create the reftable: git ls-remote https://chromium.googlesource.com/chromium/src > lsr bazel build org.eclipse.jgit.pgm:jgit && rm -rf /tmp/reftable* && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test write_stack lsr /tmp/reftable Then, we actually test the created reftable. Note that we can't test all of them at once since there are multiple ones, but below is a good example. bazel build org.eclipse.jgit.pgm:jgit && \ ./bazel-bin/org.eclipse.jgit.pgm/jgit debug-benchmark-reftable \ --test get_refs_excluding_ref --ref refs/changes \ lsr /tmp/reftable/000000000001-0000001e0371.ref Result: total time the action took using seek: 36925 usec total time the action took using filter: 874382 usec number of refs that start with prefix: 4266. number of refs that don't start with prefix: 1962695. Similarly for Android's biggest repository, platform/frameworks/base (still only partial result): total time the action took using seek: 9020 usec total time the action took using filter: 143166 usec number of refs that start with prefix: 296. number of refs that don't start with prefix: 60400. In conclusion, it's easy to see an improvement of a factor of 15-20x for large Gerrit repositories! Signed-off-by: Gal Paikin <paiking@google.com> Change-Id: I36d9b63eb259804c774864429cf2c761cd099cc3 --- .../eclipse/jgit/pgm/debug/BenchmarkReftable.java | 54 +++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java index 630fac549e..f23f4cf0ea 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/BenchmarkReftable.java @@ -23,7 +23,9 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; +import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import org.eclipse.jgit.internal.storage.file.FileReftableStack; import org.eclipse.jgit.internal.storage.io.BlockSource; @@ -47,6 +49,7 @@ class BenchmarkReftable extends TextBuiltin { SEEK_COLD, SEEK_HOT, BY_ID_COLD, BY_ID_HOT, WRITE_STACK, + GET_REFS_EXCLUDING_REF } @Option(name = "--tries") @@ -91,7 +94,11 @@ class BenchmarkReftable extends TextBuiltin { case WRITE_STACK: writeStack(); break; - } + case GET_REFS_EXCLUDING_REF : + getRefsExcludingWithSeekPast(ref); + getRefsExcludingWithFilter(ref); + break; + } } private void printf(String fmt, Object... args) throws IOException { @@ -315,4 +322,49 @@ class BenchmarkReftable extends TextBuiltin { printf("%12s %10d usec %9.1f usec/run %5d runs", "reftable", tot / 1000, (((double) tot) / tries) / 1000, tries); } + + @SuppressWarnings({"nls", "boxing"}) + private void getRefsExcludingWithFilter(String prefix) throws Exception { + long startTime = System.nanoTime(); + List<Ref> allRefs = new ArrayList<>(); + try (FileInputStream in = new FileInputStream(reftablePath); + BlockSource src = BlockSource.from(in); + ReftableReader reader = new ReftableReader(src)) { + try (RefCursor rc = reader.allRefs()) { + while (rc.next()) { + allRefs.add(rc.getRef()); + } + } + } + int total = allRefs.size(); + allRefs = allRefs.stream().filter(r -> r.getName().startsWith(prefix)).collect(Collectors.toList()); + int notStartWithPrefix = allRefs.size(); + int startWithPrefix = total - notStartWithPrefix; + long totalTime = System.nanoTime() - startTime; + printf("total time the action took using filter: %10d usec", totalTime / 1000); + printf("number of refs that start with prefix: %d", startWithPrefix); + printf("number of refs that don't start with prefix: %d", notStartWithPrefix); + } + + @SuppressWarnings({"nls", "boxing"}) + private void getRefsExcludingWithSeekPast(String prefix) throws Exception { + long start = System.nanoTime(); + try (FileInputStream in = new FileInputStream(reftablePath); + BlockSource src = BlockSource.from(in); + ReftableReader reader = new ReftableReader(src)) { + try (RefCursor rc = reader.allRefs()) { + while (rc.next()) { + if (rc.getRef().getName().startsWith(prefix)) { + break; + } + } + rc.seekPastPrefix(prefix); + while (rc.next()) { + rc.getRef(); + } + } + } + long tot = System.nanoTime() - start; + printf("total time the action took using seek: %10d usec", tot / 1000); + } } -- cgit v1.2.3