aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonio Barone <syntonyze@gmail.com>2024-04-23 17:51:22 +0200
committerAntonio Barone <syntonyze@gmail.com>2024-04-23 18:16:00 +0200
commitb4ec08db341116ced37925fd38e7668625c4af8c (patch)
treebfa04e7c794926c8969270a8c2f37fb2300a6099
parent5c1c006307cac302b07d42ed3172a3cb3e630dcc (diff)
downloadjgit-master%private.tar.gz
jgit-master%private.zip
Introduce `quickMatchSearchForReuse` config.master%private
During the search for reuse phase jgit attempts to find the best object representation by scanning all existing packfiles. This ensures that the best delta representation is selected from all the available ones, effectively saving space when repacking objects in packfiles. In a scenario where the number of packfiles is large however (i.e. thousands of packfiles), this thorough search to find the best object representation could be quite time consuming, up to the point where it becomes unacceptable, leading to a "Search for reuse phase" to last several minutes. Introduce the possibility to trade-off between efficiency and the quality of object representation chosen during the search for reuse phase, by introducing the `quickMatchSearchForReuse` config. When enabled, the search stops scanning upon finding the first matching object representation, potentially sacrificing finding the best representation for efficiency. To increase the chance to find the best (or at least a very good) object representation, when `quickMatchSearchForReuse` is enabled, the packfiles are scanned from the most recent to the oldest, giving priority to the ones having a bitmap, so that the most recent repacks are traversed first. We tested this on a ~3Gb repository having ~4M objects spanning across 4k packfiles on a Mac M3 with 36 GB ram: The search for reuse phase during a GC operation goes down from ~3 minutes to ~20 seconds. Change-Id: Ib44efded20c2e09a44b6234412a2cf94e7cd4b0f
-rw-r--r--Documentation/config-options.md1
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java78
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java11
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java20
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java13
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java7
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java46
7 files changed, 162 insertions, 14 deletions
diff --git a/Documentation/config-options.md b/Documentation/config-options.md
index 78930e6267..aa726d2698 100644
--- a/Documentation/config-options.md
+++ b/Documentation/config-options.md
@@ -129,6 +129,7 @@ Proxy configuration uses the standard Java mechanisms via class `java.net.ProxyS
| `pack.waitPreventRacyPack` | `false` | &#x20DE; | Whether we wait before opening a newly written pack to prevent its lastModified timestamp could be racy. |
| `pack.window` | `10` | &#x2705; | Number of objects to try when looking for a delta base per thread searching for deltas. |
| `pack.windowMemory` | `0` (unlimited) | &#x2705; | Maximum number of bytes to put into the delta search window. |
+| `pack.quickMatchSearchForReuse` | `false` | &#x2705; | Search for reuse stops at the first match for efficiency over optimization. |
## __repack__ options
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java
index 24a81b6715..d21d214cb1 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java
@@ -28,6 +28,7 @@ import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.text.ParseException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
@@ -35,8 +36,10 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import java.util.concurrent.ExecutionException;
import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
import org.eclipse.jgit.internal.storage.pack.PackExt;
@@ -702,12 +705,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
}
@Test
- public void testTotalPackFilesScanWhenSearchForReuseTimeoutNotSet()
+ public void testSelectRepresentationShouldScanAllObjectsWhenQuickMatchIsNotSet()
throws Exception {
- FileRepository fileRepository = setUpRepoWithMultiplePackfiles();
+ FileRepository fileRepository = setUpMultipleRepresentations();
+ config.setQuickMatchSearchForReuse(false);
PackWriter mockedPackWriter = Mockito
.spy(new PackWriter(config, fileRepository.newObjectReader()));
-
doNothing().when(mockedPackWriter).select(any(), any());
try (FileOutputStream packOS = new FileOutputStream(
@@ -716,6 +719,38 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
NullProgressMonitor.INSTANCE, packOS);
}
+ int allRepresentations = Math.toIntExact(fileRepository.getObjectDatabase().getApproximateObjectCount());
+ verify(mockedPackWriter, times(allRepresentations)).select(any(), any());
+ }
+
+ @Test
+ public void testSelectRepresentationShouldStopAtTheFirstMatchWhenQuickMatchIsSet()
+ throws Exception {
+ FileRepository fileRepository = setUpMultipleRepresentations();
+ addRepoToClose(fileRepository);
+ config.setQuickMatchSearchForReuse(true);
+ PackWriter mockedPackWriter = Mockito
+ .spy(new PackWriter(config, fileRepository.newObjectReader()));
+ doNothing().when(mockedPackWriter).select(any(), any());
+
+ writePack(fileRepository, mockedPackWriter);
+
+ int expectedRepresentationSelections = 3; // C1, T1, C2
+ verify(mockedPackWriter, times(expectedRepresentationSelections)).select(any(), any());
+ }
+
+
+ @Test
+ public void testTotalPackFilesScanWhenSearchForReuseTimeoutNotSet()
+ throws Exception {
+ FileRepository fileRepository = setUpRepoWithMultiplePackfiles();
+ PackWriter mockedPackWriter = Mockito
+ .spy(new PackWriter(config, fileRepository.newObjectReader()));
+
+ doNothing().when(mockedPackWriter).select(any(), any());
+
+ writePack(fileRepository, mockedPackWriter);
+
long numberOfPackFiles = new GC(fileRepository)
.getStatistics().numberOfPackFiles;
int expectedSelectCalls =
@@ -738,11 +773,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
doNothing().when(mockedPackWriter).select(any(), any());
- try (FileOutputStream packOS = new FileOutputStream(
- getPackFileToWrite(fileRepository, mockedPackWriter))) {
- mockedPackWriter.writePack(NullProgressMonitor.INSTANCE,
- NullProgressMonitor.INSTANCE, packOS);
- }
+ writePack(fileRepository, mockedPackWriter);
long numberOfPackFiles = new GC(fileRepository)
.getStatistics().numberOfPackFiles;
@@ -767,11 +798,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
doNothing().when(mockedPackWriter).select(any(), any());
- try (FileOutputStream packOS = new FileOutputStream(
- getPackFileToWrite(fileRepository, mockedPackWriter))) {
- mockedPackWriter.writePack(NullProgressMonitor.INSTANCE,
- NullProgressMonitor.INSTANCE, packOS);
- }
+ writePack(fileRepository, mockedPackWriter);
int expectedSelectCalls = 3; // Objects in packfiles
verify(mockedPackWriter, times(expectedSelectCalls)).select(any(),
@@ -811,6 +838,31 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
return fileRepository;
}
+ private FileRepository setUpMultipleRepresentations() throws IOException, GitAPIException, ParseException, ExecutionException, InterruptedException {
+ FileRepository fileRepository = createWorkRepository();
+ addRepoToClose(fileRepository);
+
+ try (Git git = new Git(fileRepository)) {
+ // Creates 2 objects (C1 = commit, T1 = tree) and pack them in P1 (containing, C1, T1(
+ git.commit().setMessage("First commit").call();
+ GC gc = new GC(fileRepository);
+ gc.gc().get();
+
+ // Creates 1 object (C2 commit) and pack it in P2 (containing C1, T1, C2)
+ git.commit().setMessage("Second commit").call();
+ gc.gc().get();
+ }
+ return fileRepository;
+ }
+
+ private void writePack(FileRepository fileRepository, PackWriter packWriter) throws IOException {
+ try (FileOutputStream packOS = new FileOutputStream(
+ getPackFileToWrite(fileRepository, packWriter))) {
+ packWriter.writePack(NullProgressMonitor.INSTANCE,
+ NullProgressMonitor.INSTANCE, packOS);
+ }
+ }
+
private PackFile getPackFileToWrite(FileRepository fileRepository,
PackWriter mockedPackWriter) throws IOException {
File packdir = fileRepository.getObjectDatabase().getPackDirectory();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java
index f87329ccc2..dd59731716 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java
@@ -85,6 +85,17 @@ public class Pack implements Iterable<PackIndex.MutableEntry> {
public static final Comparator<Pack> SORT = (a, b) -> b.packLastModified
.compareTo(a.packLastModified);
+ /**
+ * Sorts PackFiles to be most recently created to least recently created giving priority to the ones having bitmaps.
+ */
+ public static final Comparator<Pack> SORT_WITH_BITMAP_FIRST = Comparator.comparing((Pack pack) -> {
+ try {
+ return pack.getBitmapIndex() != null ? 0 : 1;
+ } catch (IOException e) {
+ return 1;
+ }
+ }).thenComparing(Pack.SORT);
+
private boolean useStrongRefs;
private final PackFile packFile;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java
index 8221cff442..42cd24bf3c 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java
@@ -280,13 +280,17 @@ class PackDirectory {
PackList pList = packList.get();
int retries = 0;
SEARCH: for (;;) {
- for (Pack p : pList.packs) {
+ Pack[] sortedPacks = packer.getQuickMatchSearchForReuse() ? pList.getPacksSortedByBitmapFirst() : pList.packs;
+ for (Pack p : sortedPacks) {
try {
LocalObjectRepresentation rep = p.representation(curs, otp);
p.resetTransientErrorCount();
if (rep != null) {
packer.select(otp, rep);
packer.checkSearchForReuseTimeout();
+ if(packer.getQuickMatchSearchForReuse()) {
+ break SEARCH;
+ }
}
} catch (SearchForReuseTimeout e) {
break SEARCH;
@@ -568,9 +572,23 @@ class PackDirectory {
/** All known packs, sorted by {@link Pack#SORT}. */
final Pack[] packs;
+ private Pack[] packsSortedByBitmapFirst;
+
PackList(FileSnapshot monitor, Pack[] packs) {
this.snapshot = monitor;
this.packs = packs;
}
+
+ /**
+ * Get the list of packfiles, sorted by {@link Pack#SORT_WITH_BITMAP_FIRST}.
+ *
+ * @return the ordered array of {@link Pack}
+ */
+ public Pack[] getPacksSortedByBitmapFirst() {
+ if(packsSortedByBitmapFirst == null) {
+ this.packsSortedByBitmapFirst = Arrays.stream(packs).sorted(Pack.SORT_WITH_BITMAP_FIRST).toArray(Pack[]::new);
+ }
+ return this.packsSortedByBitmapFirst;
+ }
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
index 4350f97915..863c83b250 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
@@ -275,6 +275,8 @@ public class PackWriter implements AutoCloseable {
private long searchForReuseStartTimeEpoc;
+ private final boolean quickMatchSearchForReuse;
+
private int depth;
private Collection<? extends ObjectId> unshallowObjects;
@@ -370,6 +372,7 @@ public class PackWriter implements AutoCloseable {
deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas();
searchForReuseTimeout = config.getSearchForReuseTimeout();
+ quickMatchSearchForReuse = config.getQuickMatchSearchForReuse();
reuseValidate = true; // be paranoid by default
stats = statsAccumulator != null ? statsAccumulator
: new PackStatistics.Accumulator();
@@ -702,6 +705,16 @@ public class PackWriter implements AutoCloseable {
}
/**
+ * Whether the search for reuse phase should stop at the first object representation.
+ *
+ * @return {@code true} if the first-match search for reuse is enabled, {@code false} otherwise.
+ * @since 6.9.1
+ */
+ public boolean getQuickMatchSearchForReuse() {
+ return quickMatchSearchForReuse;
+ }
+
+ /**
* Returns objects number in a pack file that was created by this writer.
*
* @return number of objects in pack.
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
index 0edf3c5ad0..b9b8507e72 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
@@ -843,6 +843,13 @@ public final class ConfigConstants {
public static final String CONFIG_KEY_SINGLE_PACK = "singlepack";
/**
+ * The "pack.quickMatchSearchForReuse" key
+ * @since 6.9.1
+ */
+ public static final String CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE = "quickmatchsearchforreuse";
+
+
+ /**
* The "pack.threads" key
* @since 5.8
*/
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
index 8373d6809a..3cd99e234a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
@@ -33,6 +33,7 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_SIZE_PREVENT_R
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PACK_KEPT_OBJECTS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PRESERVE_OLD_PACKS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PRUNE_PRESERVED;
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_DELTAS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_OBJECTS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT;
@@ -354,6 +355,8 @@ public class PackConfig {
private boolean singlePack;
+ private boolean quickMatchSearchForReuse;
+
private int minBytesForObjSizeIndex = DEFAULT_MIN_BYTES_FOR_OBJ_SIZE_INDEX;
/**
@@ -426,6 +429,7 @@ public class PackConfig {
this.singlePack = cfg.singlePack;
this.searchForReuseTimeout = cfg.searchForReuseTimeout;
this.minBytesForObjSizeIndex = cfg.minBytesForObjSizeIndex;
+ this.quickMatchSearchForReuse = cfg.quickMatchSearchForReuse;
}
/**
@@ -690,6 +694,44 @@ public class PackConfig {
}
/**
+ * Whether the search for reuse phase should stop at the first object representation.
+ * <p>
+ * When enabled, the search prioritizes packfiles with bitmaps and stops scanning upon finding the first
+ * matching object representation, potentially sacrificing finding the best representation for efficiency.
+ * </p>
+ * <p>
+ * This configuration influences the trade-off between efficiency and the quality
+ * of object representation chosen during the search for reuse phase.
+ * </p>
+ *
+ * @return {@code true} if the first-match search for reuse is enabled, {@code false} otherwise.
+ * @since 6.9.1
+ */
+ public boolean getQuickMatchSearchForReuse() {
+ return quickMatchSearchForReuse;
+ }
+
+ /**
+ * Set whether the search for reuse phase should stop at the first object representation.
+ * <p>
+ * When enabled, the search prioritizes packfiles with bitmaps and stops scanning upon finding the first
+ * matching object representation, potentially sacrificing finding the best representation for efficiency.
+ * </p>
+ * <p>
+ * This configuration influences the trade-off between efficiency and the quality
+ * of object representation chosen during the search for reuse phase.
+ * </p>
+ *
+ * @param quickMatchSearchForReuse
+ * true to stop finding the first matching object representation in search for reuse
+ *
+ * @since 6.9.1
+ */
+ public void setQuickMatchSearchForReuse(boolean quickMatchSearchForReuse) {
+ this.quickMatchSearchForReuse = quickMatchSearchForReuse;
+ }
+
+ /**
* Get the number of objects to try when looking for a delta base.
*
* This limit is per thread, if 4 threads are used the actual memory used
@@ -1434,6 +1476,9 @@ public class PackConfig {
setSinglePack(rc.getBoolean(CONFIG_PACK_SECTION,
CONFIG_KEY_SINGLE_PACK,
getSinglePack()));
+ setQuickMatchSearchForReuse(rc.getBoolean(CONFIG_PACK_SECTION,
+ CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE,
+ getQuickMatchSearchForReuse()));
setWriteReverseIndex(rc.getBoolean(CONFIG_PACK_SECTION,
CONFIG_KEY_WRITE_REVERSE_INDEX, isWriteReverseIndex()));
boolean buildBitmapsFromConfig = rc.getBoolean(CONFIG_PACK_SECTION,
@@ -1522,6 +1567,7 @@ public class PackConfig {
b.append(", singlePack=").append(getSinglePack()); //$NON-NLS-1$
b.append(", minBytesForObjSizeIndex=") //$NON-NLS-1$
.append(getMinBytesForObjSizeIndex());
+ b.append(", quickMatchSearchForReuse=").append(getQuickMatchSearchForReuse()); //$NON-NLS-1$
return b.toString();
}
}