diff options
Diffstat (limited to 'org.eclipse.jgit.benchmarks')
3 files changed, 486 insertions, 13 deletions
diff --git a/org.eclipse.jgit.benchmarks/pom.xml b/org.eclipse.jgit.benchmarks/pom.xml index d8a616f09e..87d2bb31be 100644 --- a/org.eclipse.jgit.benchmarks/pom.xml +++ b/org.eclipse.jgit.benchmarks/pom.xml @@ -16,7 +16,7 @@ <parent> <groupId>org.eclipse.jgit</groupId> <artifactId>org.eclipse.jgit-parent</artifactId> - <version>7.0.0-SNAPSHOT</version> + <version>7.3.0-SNAPSHOT</version> </parent> <artifactId>org.eclipse.jgit.benchmarks</artifactId> @@ -52,6 +52,10 @@ <artifactId>org.eclipse.jgit.junit</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </dependency> </dependencies> <build> @@ -79,7 +83,6 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> - <version>${maven-compiler-plugin-version}</version> <configuration> <encoding>UTF-8</encoding> <release>${java.version}</release> diff --git a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java index 52a881bd11..44e862e7c8 100644 --- a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java +++ b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java @@ -24,10 +24,12 @@ import java.util.stream.IntStream; import org.eclipse.jgit.api.Git; import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.internal.storage.file.FileReftableDatabase; import org.eclipse.jgit.internal.storage.file.FileRepository; import org.eclipse.jgit.lib.BatchRefUpdate; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.CoreConfig.TrustStat; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.RepositoryCache; @@ -38,8 +40,10 @@ import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.transport.ReceiveCommand; import org.eclipse.jgit.util.FS; import org.eclipse.jgit.util.FileUtils; +import org.junit.Assume; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; @@ -66,11 +70,14 @@ public class GetRefsBenchmark { @Param({ "true", "false" }) boolean useRefTable; - @Param({ "100", "2500", "10000", "50000" }) + @Param({ "true", "false" }) + boolean autoRefresh; + + @Param({ "100", "1000", "10000", "100000" }) int numBranches; - @Param({ "true", "false" }) - boolean trustFolderStat; + @Param({ "ALWAYS", "AFTER_OPEN", "NEVER" }) + TrustStat trustStat; List<String> branches = new ArrayList<>(numBranches); @@ -81,10 +88,13 @@ public class GetRefsBenchmark { @Setup @SuppressWarnings("boxing") public void setupBenchmark() throws IOException, GitAPIException { + // if we use RefDirectory skip autoRefresh = false + Assume.assumeTrue(useRefTable || autoRefresh); + String firstBranch = "firstbranch"; testDir = Files.createDirectory(Paths.get("testrepos")); - String repoName = "branches-" + numBranches + "-trustFolderStat-" - + trustFolderStat + "-" + refDatabaseType(); + String repoName = "branches-" + numBranches + "-trustStat-" + + trustStat + "-" + refDatabaseType(); Path workDir = testDir.resolve(repoName); Path repoPath = workDir.resolve(".git"); Git git = Git.init().setDirectory(workDir.toFile()).call(); @@ -97,10 +107,13 @@ public class GetRefsBenchmark { ((FileRepository) git.getRepository()).convertRefStorage( ConfigConstants.CONFIG_REF_STORAGE_REFTABLE, false, false); + FileReftableDatabase refdb = (FileReftableDatabase) git + .getRepository().getRefDatabase(); + refdb.setAutoRefresh(autoRefresh); } else { - cfg.setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null, - ConfigConstants.CONFIG_KEY_TRUSTFOLDERSTAT, - trustFolderStat); + cfg.setEnum(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_KEY_TRUST_STAT, + trustStat); } cfg.setInt(ConfigConstants.CONFIG_RECEIVE_SECTION, null, "maxCommandBytes", Integer.MAX_VALUE); @@ -112,7 +125,8 @@ public class GetRefsBenchmark { System.out.println("Preparing test"); System.out.println("- repository: \t\t" + repoPath); System.out.println("- refDatabase: \t\t" + refDatabaseType()); - System.out.println("- trustFolderStat: \t" + trustFolderStat); + System.out.println("- autoRefresh: \t\t" + autoRefresh); + System.out.println("- trustStat: \t" + trustStat); System.out.println("- branches: \t\t" + numBranches); BatchRefUpdate u = repo.getRefDatabase().newBatchUpdate(); @@ -152,7 +166,8 @@ public class GetRefsBenchmark { @BenchmarkMode({ Mode.AverageTime }) @OutputTimeUnit(TimeUnit.MICROSECONDS) @Warmup(iterations = 2, time = 100, timeUnit = TimeUnit.MILLISECONDS) - @Measurement(iterations = 2, time = 10, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(2) public void testGetExactRef(Blackhole blackhole, BenchmarkState state) throws IOException { String branchName = state.branches @@ -164,7 +179,8 @@ public class GetRefsBenchmark { @BenchmarkMode({ Mode.AverageTime }) @OutputTimeUnit(TimeUnit.MICROSECONDS) @Warmup(iterations = 2, time = 100, timeUnit = TimeUnit.MILLISECONDS) - @Measurement(iterations = 2, time = 10, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(2) public void testGetRefsByPrefix(Blackhole blackhole, BenchmarkState state) throws IOException { String branchPrefix = "refs/heads/branch/" + branchIndex.nextInt(100) diff --git a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java new file mode 100644 index 0000000000..19297ebebb --- /dev/null +++ b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java @@ -0,0 +1,454 @@ +/* + * Copyright (C) 2022, Matthias Sohn <matthias.sohn@sap.com> and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.benchmarks; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.concurrent.TimeUnit; + +import static org.eclipse.jgit.diff.RawText.getBufferSize; +import static org.eclipse.jgit.diff.RawText.isBinary; +import static org.eclipse.jgit.diff.RawText.isCrLfText; + +@State(Scope.Thread) +public class RawTextBenchmark { + + @State(Scope.Benchmark) + public static class BenchmarkState { + + @Param({"1", "2", "3", "4", "5", "6"}) + int testIndex; + + @Param({"false", "true"}) + boolean complete; + + byte[] bytes; + + @Setup + public void setupBenchmark() { + switch (testIndex) { + case 1: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + bytes = tmpBytes; + break; + } + case 2: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + byte[] tmpBytes2 = new byte[tmpBytes.length + 1]; + System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length); + tmpBytes2[500] = '\0'; + tmpBytes2[tmpBytes.length] = '\0'; + bytes = tmpBytes2; + break; + } + case 3: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + byte[] tmpBytes2 = new byte[tmpBytes.length + 1]; + System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length); + tmpBytes2[500] = '\r'; + tmpBytes2[tmpBytes.length] = '\r'; + bytes = tmpBytes2; + break; + } + case 4: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + byte[] tmpBytes2 = new byte[tmpBytes.length + 1]; + System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length); + tmpBytes2[499] = '\r'; + tmpBytes2[500] = '\n'; + tmpBytes2[tmpBytes.length - 1] = '\r'; + tmpBytes2[tmpBytes.length] = '\n'; + bytes = tmpBytes2; + break; + } + case 5: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + tmpBytes[0] = '\0'; + bytes = tmpBytes; + break; + } + case 6: { + byte[] tmpBytes = "a".repeat(102400).getBytes(); + tmpBytes[0] = '\r'; + bytes = tmpBytes; + break; + } + default: + } + } + + @TearDown + public void teardown() { + } + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsCrLfTextOld(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isCrLfTextOld( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsCrLfTextNewCandidate1(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isCrLfTextNewCandidate1( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsCrLfTextNewCandidate2(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isCrLfTextNewCandidate2( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsCrLfTextNewCandidate3(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isCrLfTextNewCandidate3( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsCrLfTextNew(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isCrLfText( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsBinaryOld(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isBinaryOld( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + + @Benchmark + @BenchmarkMode({Mode.AverageTime}) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(1) + public void testIsBinaryNew(Blackhole blackhole, BenchmarkState state) { + blackhole.consume( + isBinary( + state.bytes, + state.bytes.length, + state.complete + ) + ); + } + + + /** + * Determine heuristically whether a byte array represents binary (as + * opposed to text) content. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. This should be + * {@code raw.length} unless {@code raw} was over-allocated by + * the caller. + * @param complete + * whether {@code raw} contains the whole data + * @return true if raw is likely to be a binary file, false otherwise + * @since 6.0 + */ + public static boolean isBinaryOld(byte[] raw, int length, boolean complete) { + // Similar heuristic as C Git. Differences: + // - limited buffer size; may be only the beginning of a large blob + // - no counting of printable vs. non-printable bytes < 0x20 and 0x7F + int maxLength = getBufferSize(); + boolean isComplete = complete; + if (length > maxLength) { + // We restrict the length in all cases to getBufferSize() to get + // predictable behavior. Sometimes we load streams, and sometimes we + // have the full data in memory. With streams, we never look at more + // than the first getBufferSize() bytes. If we looked at more when + // we have the full data, different code paths in JGit might come to + // different conclusions. + length = maxLength; + isComplete = false; + } + byte last = 'x'; // Just something inconspicuous. + for (int ptr = 0; ptr < length; ptr++) { + byte curr = raw[ptr]; + if (isBinary(curr, last)) { + return true; + } + last = curr; + } + if (isComplete) { + // Buffer contains everything... + return last == '\r'; // ... so this must be a lone CR + } + return false; + } + + /** + * Determine heuristically whether a byte array represents text content + * using CR-LF as line separator. + * + * @param raw the raw file content. + * @param length number of bytes in {@code raw} to evaluate. + * @param complete whether {@code raw} contains the whole data + * @return {@code true} if raw is likely to be CR-LF delimited text, + * {@code false} otherwise + * @since 6.0 + */ + public static boolean isCrLfTextOld(byte[] raw, int length, boolean complete) { + boolean has_crlf = false; + byte last = 'x'; // Just something inconspicuous + for (int ptr = 0; ptr < length; ptr++) { + byte curr = raw[ptr]; + if (isBinary(curr, last)) { + return false; + } + if (curr == '\n' && last == '\r') { + has_crlf = true; + } + last = curr; + } + if (last == '\r') { + if (complete) { + // Lone CR: it's binary after all. + return false; + } + // Tough call. If the next byte, which we don't have, would be a + // '\n', it'd be a CR-LF text, otherwise it'd be binary. Just decide + // based on what we already scanned; it wasn't binary until now. + } + return has_crlf; + } + + /** + * Determine heuristically whether a byte array represents text content + * using CR-LF as line separator. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. + * @return {@code true} if raw is likely to be CR-LF delimited text, + * {@code false} otherwise + * @param complete + * whether {@code raw} contains the whole data + * @since 6.0 + */ + public static boolean isCrLfTextNewCandidate1(byte[] raw, int length, boolean complete) { + boolean has_crlf = false; + + // first detect empty + if (length <= 0) { + return false; + } + + // next detect '\0' + for (int reversePtr = length - 1; reversePtr >= 0; --reversePtr) { + if (raw[reversePtr] == '\0') { + return false; + } + } + + // if '\r' be last, then if complete then return non-crlf + if (raw[length - 1] == '\r' && complete) { + return false; + } + + for (int ptr = 0; ptr < length - 1; ptr++) { + byte curr = raw[ptr]; + if (curr == '\r') { + byte next = raw[ptr + 1]; + if (next != '\n') { + return false; + } + // else + // we have crlf here + has_crlf = true; + // as next is '\n', it can never be '\r', just skip it from next check + ++ptr; + } + } + + return has_crlf; + } + + /** + * Determine heuristically whether a byte array represents text content + * using CR-LF as line separator. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. + * @return {@code true} if raw is likely to be CR-LF delimited text, + * {@code false} otherwise + * @param complete + * whether {@code raw} contains the whole data + * @since 6.0 + */ + public static boolean isCrLfTextNewCandidate2(byte[] raw, int length, boolean complete) { + boolean has_crlf = false; + + // first detect empty + if (length <= 0) { + return false; + } + + // if '\r' be last, then if complete then return non-crlf + byte last = raw[length - 1]; + if (last == '\0' || last == '\r' && complete) { + return false; + } + + for (int ptr = 0; ptr < length - 1; ptr++) { + byte b = raw[ptr]; + switch (b) { + case '\0': + return false; + case '\r': { + ++ptr; + b = raw[ptr]; + if (b != '\n') { + return false; + } + // else + // we have crlf here + has_crlf = true; + // as next is '\n', it can never be '\r', just skip it from next check + break; + } + default: + // do nothing; + break; + } + } + + return has_crlf; + } + + /** + * Determine heuristically whether a byte array represents text content + * using CR-LF as line separator. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. + * @return {@code true} if raw is likely to be CR-LF delimited text, + * {@code false} otherwise + * @param complete + * whether {@code raw} contains the whole data + * @since 6.0 + */ + public static boolean isCrLfTextNewCandidate3(byte[] raw, int length, boolean complete) { + boolean has_crlf = false; + + int ptr = -1; + byte current; + while (ptr < length - 2) { + current = raw[++ptr]; + if ('\0' == current || '\r' == current && (raw[++ptr] != '\n' || !(has_crlf = true))) { + return false; + } + } + + if (ptr == length - 2) { + // if '\r' be last, then if isComplete then return binary + current = raw[++ptr]; + if('\0' == current || '\r' == current && complete){ + return false; + } + } + + return has_crlf; + } + + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(RawTextBenchmark.class.getSimpleName()) + .forks(1).jvmArgs("-ea").build(); + new Runner(opt).run(); + } +} |