aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit.benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'org.eclipse.jgit.benchmarks')
-rw-r--r--org.eclipse.jgit.benchmarks/pom.xml7
-rw-r--r--org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java38
-rw-r--r--org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java454
3 files changed, 486 insertions, 13 deletions
diff --git a/org.eclipse.jgit.benchmarks/pom.xml b/org.eclipse.jgit.benchmarks/pom.xml
index d8a616f09e..87d2bb31be 100644
--- a/org.eclipse.jgit.benchmarks/pom.xml
+++ b/org.eclipse.jgit.benchmarks/pom.xml
@@ -16,7 +16,7 @@
<parent>
<groupId>org.eclipse.jgit</groupId>
<artifactId>org.eclipse.jgit-parent</artifactId>
- <version>7.0.0-SNAPSHOT</version>
+ <version>7.3.0-SNAPSHOT</version>
</parent>
<artifactId>org.eclipse.jgit.benchmarks</artifactId>
@@ -52,6 +52,10 @@
<artifactId>org.eclipse.jgit.junit</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
</dependencies>
<build>
@@ -79,7 +83,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
- <version>${maven-compiler-plugin-version}</version>
<configuration>
<encoding>UTF-8</encoding>
<release>${java.version}</release>
diff --git a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java
index 52a881bd11..44e862e7c8 100644
--- a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java
+++ b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/GetRefsBenchmark.java
@@ -24,10 +24,12 @@ import java.util.stream.IntStream;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.errors.GitAPIException;
+import org.eclipse.jgit.internal.storage.file.FileReftableDatabase;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.lib.BatchRefUpdate;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.CoreConfig.TrustStat;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.RepositoryCache;
@@ -38,8 +40,10 @@ import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.transport.ReceiveCommand;
import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.FileUtils;
+import org.junit.Assume;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
@@ -66,11 +70,14 @@ public class GetRefsBenchmark {
@Param({ "true", "false" })
boolean useRefTable;
- @Param({ "100", "2500", "10000", "50000" })
+ @Param({ "true", "false" })
+ boolean autoRefresh;
+
+ @Param({ "100", "1000", "10000", "100000" })
int numBranches;
- @Param({ "true", "false" })
- boolean trustFolderStat;
+ @Param({ "ALWAYS", "AFTER_OPEN", "NEVER" })
+ TrustStat trustStat;
List<String> branches = new ArrayList<>(numBranches);
@@ -81,10 +88,13 @@ public class GetRefsBenchmark {
@Setup
@SuppressWarnings("boxing")
public void setupBenchmark() throws IOException, GitAPIException {
+ // if we use RefDirectory skip autoRefresh = false
+ Assume.assumeTrue(useRefTable || autoRefresh);
+
String firstBranch = "firstbranch";
testDir = Files.createDirectory(Paths.get("testrepos"));
- String repoName = "branches-" + numBranches + "-trustFolderStat-"
- + trustFolderStat + "-" + refDatabaseType();
+ String repoName = "branches-" + numBranches + "-trustStat-"
+ + trustStat + "-" + refDatabaseType();
Path workDir = testDir.resolve(repoName);
Path repoPath = workDir.resolve(".git");
Git git = Git.init().setDirectory(workDir.toFile()).call();
@@ -97,10 +107,13 @@ public class GetRefsBenchmark {
((FileRepository) git.getRepository()).convertRefStorage(
ConfigConstants.CONFIG_REF_STORAGE_REFTABLE, false,
false);
+ FileReftableDatabase refdb = (FileReftableDatabase) git
+ .getRepository().getRefDatabase();
+ refdb.setAutoRefresh(autoRefresh);
} else {
- cfg.setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null,
- ConfigConstants.CONFIG_KEY_TRUSTFOLDERSTAT,
- trustFolderStat);
+ cfg.setEnum(ConfigConstants.CONFIG_CORE_SECTION, null,
+ ConfigConstants.CONFIG_KEY_TRUST_STAT,
+ trustStat);
}
cfg.setInt(ConfigConstants.CONFIG_RECEIVE_SECTION, null,
"maxCommandBytes", Integer.MAX_VALUE);
@@ -112,7 +125,8 @@ public class GetRefsBenchmark {
System.out.println("Preparing test");
System.out.println("- repository: \t\t" + repoPath);
System.out.println("- refDatabase: \t\t" + refDatabaseType());
- System.out.println("- trustFolderStat: \t" + trustFolderStat);
+ System.out.println("- autoRefresh: \t\t" + autoRefresh);
+ System.out.println("- trustStat: \t" + trustStat);
System.out.println("- branches: \t\t" + numBranches);
BatchRefUpdate u = repo.getRefDatabase().newBatchUpdate();
@@ -152,7 +166,8 @@ public class GetRefsBenchmark {
@BenchmarkMode({ Mode.AverageTime })
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 2, time = 100, timeUnit = TimeUnit.MILLISECONDS)
- @Measurement(iterations = 2, time = 10, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(2)
public void testGetExactRef(Blackhole blackhole, BenchmarkState state)
throws IOException {
String branchName = state.branches
@@ -164,7 +179,8 @@ public class GetRefsBenchmark {
@BenchmarkMode({ Mode.AverageTime })
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 2, time = 100, timeUnit = TimeUnit.MILLISECONDS)
- @Measurement(iterations = 2, time = 10, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(2)
public void testGetRefsByPrefix(Blackhole blackhole, BenchmarkState state)
throws IOException {
String branchPrefix = "refs/heads/branch/" + branchIndex.nextInt(100)
diff --git a/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java
new file mode 100644
index 0000000000..19297ebebb
--- /dev/null
+++ b/org.eclipse.jgit.benchmarks/src/org/eclipse/jgit/benchmarks/RawTextBenchmark.java
@@ -0,0 +1,454 @@
+/*
+ * Copyright (C) 2022, Matthias Sohn <matthias.sohn@sap.com> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.benchmarks;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.util.concurrent.TimeUnit;
+
+import static org.eclipse.jgit.diff.RawText.getBufferSize;
+import static org.eclipse.jgit.diff.RawText.isBinary;
+import static org.eclipse.jgit.diff.RawText.isCrLfText;
+
+@State(Scope.Thread)
+public class RawTextBenchmark {
+
+ @State(Scope.Benchmark)
+ public static class BenchmarkState {
+
+ @Param({"1", "2", "3", "4", "5", "6"})
+ int testIndex;
+
+ @Param({"false", "true"})
+ boolean complete;
+
+ byte[] bytes;
+
+ @Setup
+ public void setupBenchmark() {
+ switch (testIndex) {
+ case 1: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ bytes = tmpBytes;
+ break;
+ }
+ case 2: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ byte[] tmpBytes2 = new byte[tmpBytes.length + 1];
+ System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length);
+ tmpBytes2[500] = '\0';
+ tmpBytes2[tmpBytes.length] = '\0';
+ bytes = tmpBytes2;
+ break;
+ }
+ case 3: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ byte[] tmpBytes2 = new byte[tmpBytes.length + 1];
+ System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length);
+ tmpBytes2[500] = '\r';
+ tmpBytes2[tmpBytes.length] = '\r';
+ bytes = tmpBytes2;
+ break;
+ }
+ case 4: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ byte[] tmpBytes2 = new byte[tmpBytes.length + 1];
+ System.arraycopy(tmpBytes, 0, tmpBytes2, 0, tmpBytes.length);
+ tmpBytes2[499] = '\r';
+ tmpBytes2[500] = '\n';
+ tmpBytes2[tmpBytes.length - 1] = '\r';
+ tmpBytes2[tmpBytes.length] = '\n';
+ bytes = tmpBytes2;
+ break;
+ }
+ case 5: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ tmpBytes[0] = '\0';
+ bytes = tmpBytes;
+ break;
+ }
+ case 6: {
+ byte[] tmpBytes = "a".repeat(102400).getBytes();
+ tmpBytes[0] = '\r';
+ bytes = tmpBytes;
+ break;
+ }
+ default:
+ }
+ }
+
+ @TearDown
+ public void teardown() {
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsCrLfTextOld(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isCrLfTextOld(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsCrLfTextNewCandidate1(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isCrLfTextNewCandidate1(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsCrLfTextNewCandidate2(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isCrLfTextNewCandidate2(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsCrLfTextNewCandidate3(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isCrLfTextNewCandidate3(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsCrLfTextNew(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isCrLfText(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsBinaryOld(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isBinaryOld(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+
+ @Benchmark
+ @BenchmarkMode({Mode.AverageTime})
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ @Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
+ @Fork(1)
+ public void testIsBinaryNew(Blackhole blackhole, BenchmarkState state) {
+ blackhole.consume(
+ isBinary(
+ state.bytes,
+ state.bytes.length,
+ state.complete
+ )
+ );
+ }
+
+
+ /**
+ * Determine heuristically whether a byte array represents binary (as
+ * opposed to text) content.
+ *
+ * @param raw
+ * the raw file content.
+ * @param length
+ * number of bytes in {@code raw} to evaluate. This should be
+ * {@code raw.length} unless {@code raw} was over-allocated by
+ * the caller.
+ * @param complete
+ * whether {@code raw} contains the whole data
+ * @return true if raw is likely to be a binary file, false otherwise
+ * @since 6.0
+ */
+ public static boolean isBinaryOld(byte[] raw, int length, boolean complete) {
+ // Similar heuristic as C Git. Differences:
+ // - limited buffer size; may be only the beginning of a large blob
+ // - no counting of printable vs. non-printable bytes < 0x20 and 0x7F
+ int maxLength = getBufferSize();
+ boolean isComplete = complete;
+ if (length > maxLength) {
+ // We restrict the length in all cases to getBufferSize() to get
+ // predictable behavior. Sometimes we load streams, and sometimes we
+ // have the full data in memory. With streams, we never look at more
+ // than the first getBufferSize() bytes. If we looked at more when
+ // we have the full data, different code paths in JGit might come to
+ // different conclusions.
+ length = maxLength;
+ isComplete = false;
+ }
+ byte last = 'x'; // Just something inconspicuous.
+ for (int ptr = 0; ptr < length; ptr++) {
+ byte curr = raw[ptr];
+ if (isBinary(curr, last)) {
+ return true;
+ }
+ last = curr;
+ }
+ if (isComplete) {
+ // Buffer contains everything...
+ return last == '\r'; // ... so this must be a lone CR
+ }
+ return false;
+ }
+
+ /**
+ * Determine heuristically whether a byte array represents text content
+ * using CR-LF as line separator.
+ *
+ * @param raw the raw file content.
+ * @param length number of bytes in {@code raw} to evaluate.
+ * @param complete whether {@code raw} contains the whole data
+ * @return {@code true} if raw is likely to be CR-LF delimited text,
+ * {@code false} otherwise
+ * @since 6.0
+ */
+ public static boolean isCrLfTextOld(byte[] raw, int length, boolean complete) {
+ boolean has_crlf = false;
+ byte last = 'x'; // Just something inconspicuous
+ for (int ptr = 0; ptr < length; ptr++) {
+ byte curr = raw[ptr];
+ if (isBinary(curr, last)) {
+ return false;
+ }
+ if (curr == '\n' && last == '\r') {
+ has_crlf = true;
+ }
+ last = curr;
+ }
+ if (last == '\r') {
+ if (complete) {
+ // Lone CR: it's binary after all.
+ return false;
+ }
+ // Tough call. If the next byte, which we don't have, would be a
+ // '\n', it'd be a CR-LF text, otherwise it'd be binary. Just decide
+ // based on what we already scanned; it wasn't binary until now.
+ }
+ return has_crlf;
+ }
+
+ /**
+ * Determine heuristically whether a byte array represents text content
+ * using CR-LF as line separator.
+ *
+ * @param raw
+ * the raw file content.
+ * @param length
+ * number of bytes in {@code raw} to evaluate.
+ * @return {@code true} if raw is likely to be CR-LF delimited text,
+ * {@code false} otherwise
+ * @param complete
+ * whether {@code raw} contains the whole data
+ * @since 6.0
+ */
+ public static boolean isCrLfTextNewCandidate1(byte[] raw, int length, boolean complete) {
+ boolean has_crlf = false;
+
+ // first detect empty
+ if (length <= 0) {
+ return false;
+ }
+
+ // next detect '\0'
+ for (int reversePtr = length - 1; reversePtr >= 0; --reversePtr) {
+ if (raw[reversePtr] == '\0') {
+ return false;
+ }
+ }
+
+ // if '\r' be last, then if complete then return non-crlf
+ if (raw[length - 1] == '\r' && complete) {
+ return false;
+ }
+
+ for (int ptr = 0; ptr < length - 1; ptr++) {
+ byte curr = raw[ptr];
+ if (curr == '\r') {
+ byte next = raw[ptr + 1];
+ if (next != '\n') {
+ return false;
+ }
+ // else
+ // we have crlf here
+ has_crlf = true;
+ // as next is '\n', it can never be '\r', just skip it from next check
+ ++ptr;
+ }
+ }
+
+ return has_crlf;
+ }
+
+ /**
+ * Determine heuristically whether a byte array represents text content
+ * using CR-LF as line separator.
+ *
+ * @param raw
+ * the raw file content.
+ * @param length
+ * number of bytes in {@code raw} to evaluate.
+ * @return {@code true} if raw is likely to be CR-LF delimited text,
+ * {@code false} otherwise
+ * @param complete
+ * whether {@code raw} contains the whole data
+ * @since 6.0
+ */
+ public static boolean isCrLfTextNewCandidate2(byte[] raw, int length, boolean complete) {
+ boolean has_crlf = false;
+
+ // first detect empty
+ if (length <= 0) {
+ return false;
+ }
+
+ // if '\r' be last, then if complete then return non-crlf
+ byte last = raw[length - 1];
+ if (last == '\0' || last == '\r' && complete) {
+ return false;
+ }
+
+ for (int ptr = 0; ptr < length - 1; ptr++) {
+ byte b = raw[ptr];
+ switch (b) {
+ case '\0':
+ return false;
+ case '\r': {
+ ++ptr;
+ b = raw[ptr];
+ if (b != '\n') {
+ return false;
+ }
+ // else
+ // we have crlf here
+ has_crlf = true;
+ // as next is '\n', it can never be '\r', just skip it from next check
+ break;
+ }
+ default:
+ // do nothing;
+ break;
+ }
+ }
+
+ return has_crlf;
+ }
+
+ /**
+ * Determine heuristically whether a byte array represents text content
+ * using CR-LF as line separator.
+ *
+ * @param raw
+ * the raw file content.
+ * @param length
+ * number of bytes in {@code raw} to evaluate.
+ * @return {@code true} if raw is likely to be CR-LF delimited text,
+ * {@code false} otherwise
+ * @param complete
+ * whether {@code raw} contains the whole data
+ * @since 6.0
+ */
+ public static boolean isCrLfTextNewCandidate3(byte[] raw, int length, boolean complete) {
+ boolean has_crlf = false;
+
+ int ptr = -1;
+ byte current;
+ while (ptr < length - 2) {
+ current = raw[++ptr];
+ if ('\0' == current || '\r' == current && (raw[++ptr] != '\n' || !(has_crlf = true))) {
+ return false;
+ }
+ }
+
+ if (ptr == length - 2) {
+ // if '\r' be last, then if isComplete then return binary
+ current = raw[++ptr];
+ if('\0' == current || '\r' == current && complete){
+ return false;
+ }
+ }
+
+ return has_crlf;
+ }
+
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(RawTextBenchmark.class.getSimpleName())
+ .forks(1).jvmArgs("-ea").build();
+ new Runner(opt).run();
+ }
+}