diff options
author | Antonio Barone <syntonyze@gmail.com> | 2024-04-23 17:51:22 +0200 |
---|---|---|
committer | Antonio Barone <syntonyze@gmail.com> | 2024-04-23 18:16:00 +0200 |
commit | b4ec08db341116ced37925fd38e7668625c4af8c (patch) | |
tree | bfa04e7c794926c8969270a8c2f37fb2300a6099 | |
parent | 5c1c006307cac302b07d42ed3172a3cb3e630dcc (diff) | |
download | jgit-master%private.tar.gz jgit-master%private.zip |
Introduce `quickMatchSearchForReuse` config.master%private
During the search for reuse phase jgit attempts to find the best object
representation by scanning all existing packfiles.
This ensures that the best delta representation is selected from all the
available ones, effectively saving space when repacking objects in
packfiles.
In a scenario where the number of packfiles is large however (i.e.
thousands of packfiles), this thorough search to find the best object
representation could be quite time consuming, up to the point where it
becomes unacceptable, leading to a "Search for reuse phase" to last
several minutes.
Introduce the possibility to trade-off between efficiency and the
quality of object representation chosen during the search for reuse
phase, by introducing the `quickMatchSearchForReuse` config.
When enabled, the search stops scanning upon finding the first matching
object representation, potentially sacrificing finding the best
representation for efficiency.
To increase the chance to find the best (or at least a very good) object
representation, when `quickMatchSearchForReuse` is enabled, the
packfiles are scanned from the most recent to the oldest, giving
priority to the ones having a bitmap, so that the most recent repacks
are traversed first.
We tested this on a ~3Gb repository having ~4M objects spanning across
4k packfiles on a Mac M3 with 36 GB ram: The search for reuse phase
during a GC operation goes down from ~3 minutes to ~20 seconds.
Change-Id: Ib44efded20c2e09a44b6234412a2cf94e7cd4b0f
7 files changed, 162 insertions, 14 deletions
diff --git a/Documentation/config-options.md b/Documentation/config-options.md index 78930e6267..aa726d2698 100644 --- a/Documentation/config-options.md +++ b/Documentation/config-options.md @@ -129,6 +129,7 @@ Proxy configuration uses the standard Java mechanisms via class `java.net.ProxyS | `pack.waitPreventRacyPack` | `false` | ⃞ | Whether we wait before opening a newly written pack to prevent its lastModified timestamp could be racy. | | `pack.window` | `10` | ✅ | Number of objects to try when looking for a delta base per thread searching for deltas. | | `pack.windowMemory` | `0` (unlimited) | ✅ | Maximum number of bytes to put into the delta search window. | +| `pack.quickMatchSearchForReuse` | `false` | ✅ | Search for reuse stops at the first match for efficiency over optimization. | ## __repack__ options diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java index 24a81b6715..d21d214cb1 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java @@ -28,6 +28,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.text.ParseException; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; @@ -35,8 +36,10 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.concurrent.ExecutionException; import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.internal.storage.pack.PackExt; @@ -702,12 +705,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { } @Test - public void testTotalPackFilesScanWhenSearchForReuseTimeoutNotSet() + public void testSelectRepresentationShouldScanAllObjectsWhenQuickMatchIsNotSet() throws Exception { - FileRepository fileRepository = setUpRepoWithMultiplePackfiles(); + FileRepository fileRepository = setUpMultipleRepresentations(); + config.setQuickMatchSearchForReuse(false); PackWriter mockedPackWriter = Mockito .spy(new PackWriter(config, fileRepository.newObjectReader())); - doNothing().when(mockedPackWriter).select(any(), any()); try (FileOutputStream packOS = new FileOutputStream( @@ -716,6 +719,38 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { NullProgressMonitor.INSTANCE, packOS); } + int allRepresentations = Math.toIntExact(fileRepository.getObjectDatabase().getApproximateObjectCount()); + verify(mockedPackWriter, times(allRepresentations)).select(any(), any()); + } + + @Test + public void testSelectRepresentationShouldStopAtTheFirstMatchWhenQuickMatchIsSet() + throws Exception { + FileRepository fileRepository = setUpMultipleRepresentations(); + addRepoToClose(fileRepository); + config.setQuickMatchSearchForReuse(true); + PackWriter mockedPackWriter = Mockito + .spy(new PackWriter(config, fileRepository.newObjectReader())); + doNothing().when(mockedPackWriter).select(any(), any()); + + writePack(fileRepository, mockedPackWriter); + + int expectedRepresentationSelections = 3; // C1, T1, C2 + verify(mockedPackWriter, times(expectedRepresentationSelections)).select(any(), any()); + } + + + @Test + public void testTotalPackFilesScanWhenSearchForReuseTimeoutNotSet() + throws Exception { + FileRepository fileRepository = setUpRepoWithMultiplePackfiles(); + PackWriter mockedPackWriter = Mockito + .spy(new PackWriter(config, fileRepository.newObjectReader())); + + doNothing().when(mockedPackWriter).select(any(), any()); + + writePack(fileRepository, mockedPackWriter); + long numberOfPackFiles = new GC(fileRepository) .getStatistics().numberOfPackFiles; int expectedSelectCalls = @@ -738,11 +773,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { doNothing().when(mockedPackWriter).select(any(), any()); - try (FileOutputStream packOS = new FileOutputStream( - getPackFileToWrite(fileRepository, mockedPackWriter))) { - mockedPackWriter.writePack(NullProgressMonitor.INSTANCE, - NullProgressMonitor.INSTANCE, packOS); - } + writePack(fileRepository, mockedPackWriter); long numberOfPackFiles = new GC(fileRepository) .getStatistics().numberOfPackFiles; @@ -767,11 +798,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { doNothing().when(mockedPackWriter).select(any(), any()); - try (FileOutputStream packOS = new FileOutputStream( - getPackFileToWrite(fileRepository, mockedPackWriter))) { - mockedPackWriter.writePack(NullProgressMonitor.INSTANCE, - NullProgressMonitor.INSTANCE, packOS); - } + writePack(fileRepository, mockedPackWriter); int expectedSelectCalls = 3; // Objects in packfiles verify(mockedPackWriter, times(expectedSelectCalls)).select(any(), @@ -811,6 +838,31 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { return fileRepository; } + private FileRepository setUpMultipleRepresentations() throws IOException, GitAPIException, ParseException, ExecutionException, InterruptedException { + FileRepository fileRepository = createWorkRepository(); + addRepoToClose(fileRepository); + + try (Git git = new Git(fileRepository)) { + // Creates 2 objects (C1 = commit, T1 = tree) and pack them in P1 (containing, C1, T1( + git.commit().setMessage("First commit").call(); + GC gc = new GC(fileRepository); + gc.gc().get(); + + // Creates 1 object (C2 commit) and pack it in P2 (containing C1, T1, C2) + git.commit().setMessage("Second commit").call(); + gc.gc().get(); + } + return fileRepository; + } + + private void writePack(FileRepository fileRepository, PackWriter packWriter) throws IOException { + try (FileOutputStream packOS = new FileOutputStream( + getPackFileToWrite(fileRepository, packWriter))) { + packWriter.writePack(NullProgressMonitor.INSTANCE, + NullProgressMonitor.INSTANCE, packOS); + } + } + private PackFile getPackFileToWrite(FileRepository fileRepository, PackWriter mockedPackWriter) throws IOException { File packdir = fileRepository.getObjectDatabase().getPackDirectory(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java index f87329ccc2..dd59731716 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/Pack.java @@ -85,6 +85,17 @@ public class Pack implements Iterable<PackIndex.MutableEntry> { public static final Comparator<Pack> SORT = (a, b) -> b.packLastModified .compareTo(a.packLastModified); + /** + * Sorts PackFiles to be most recently created to least recently created giving priority to the ones having bitmaps. + */ + public static final Comparator<Pack> SORT_WITH_BITMAP_FIRST = Comparator.comparing((Pack pack) -> { + try { + return pack.getBitmapIndex() != null ? 0 : 1; + } catch (IOException e) { + return 1; + } + }).thenComparing(Pack.SORT); + private boolean useStrongRefs; private final PackFile packFile; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java index 8221cff442..42cd24bf3c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java @@ -280,13 +280,17 @@ class PackDirectory { PackList pList = packList.get(); int retries = 0; SEARCH: for (;;) { - for (Pack p : pList.packs) { + Pack[] sortedPacks = packer.getQuickMatchSearchForReuse() ? pList.getPacksSortedByBitmapFirst() : pList.packs; + for (Pack p : sortedPacks) { try { LocalObjectRepresentation rep = p.representation(curs, otp); p.resetTransientErrorCount(); if (rep != null) { packer.select(otp, rep); packer.checkSearchForReuseTimeout(); + if(packer.getQuickMatchSearchForReuse()) { + break SEARCH; + } } } catch (SearchForReuseTimeout e) { break SEARCH; @@ -568,9 +572,23 @@ class PackDirectory { /** All known packs, sorted by {@link Pack#SORT}. */ final Pack[] packs; + private Pack[] packsSortedByBitmapFirst; + PackList(FileSnapshot monitor, Pack[] packs) { this.snapshot = monitor; this.packs = packs; } + + /** + * Get the list of packfiles, sorted by {@link Pack#SORT_WITH_BITMAP_FIRST}. + * + * @return the ordered array of {@link Pack} + */ + public Pack[] getPacksSortedByBitmapFirst() { + if(packsSortedByBitmapFirst == null) { + this.packsSortedByBitmapFirst = Arrays.stream(packs).sorted(Pack.SORT_WITH_BITMAP_FIRST).toArray(Pack[]::new); + } + return this.packsSortedByBitmapFirst; + } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 4350f97915..863c83b250 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -275,6 +275,8 @@ public class PackWriter implements AutoCloseable { private long searchForReuseStartTimeEpoc; + private final boolean quickMatchSearchForReuse; + private int depth; private Collection<? extends ObjectId> unshallowObjects; @@ -370,6 +372,7 @@ public class PackWriter implements AutoCloseable { deltaBaseAsOffset = config.isDeltaBaseAsOffset(); reuseDeltas = config.isReuseDeltas(); searchForReuseTimeout = config.getSearchForReuseTimeout(); + quickMatchSearchForReuse = config.getQuickMatchSearchForReuse(); reuseValidate = true; // be paranoid by default stats = statsAccumulator != null ? statsAccumulator : new PackStatistics.Accumulator(); @@ -702,6 +705,16 @@ public class PackWriter implements AutoCloseable { } /** + * Whether the search for reuse phase should stop at the first object representation. + * + * @return {@code true} if the first-match search for reuse is enabled, {@code false} otherwise. + * @since 6.9.1 + */ + public boolean getQuickMatchSearchForReuse() { + return quickMatchSearchForReuse; + } + + /** * Returns objects number in a pack file that was created by this writer. * * @return number of objects in pack. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java index 0edf3c5ad0..b9b8507e72 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java @@ -843,6 +843,13 @@ public final class ConfigConstants { public static final String CONFIG_KEY_SINGLE_PACK = "singlepack"; /** + * The "pack.quickMatchSearchForReuse" key + * @since 6.9.1 + */ + public static final String CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE = "quickmatchsearchforreuse"; + + + /** * The "pack.threads" key * @since 5.8 */ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java index 8373d6809a..3cd99e234a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java @@ -33,6 +33,7 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_SIZE_PREVENT_R import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PACK_KEPT_OBJECTS; import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PRESERVE_OLD_PACKS; import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_PRUNE_PRESERVED; +import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE; import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_DELTAS; import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_OBJECTS; import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT; @@ -354,6 +355,8 @@ public class PackConfig { private boolean singlePack; + private boolean quickMatchSearchForReuse; + private int minBytesForObjSizeIndex = DEFAULT_MIN_BYTES_FOR_OBJ_SIZE_INDEX; /** @@ -426,6 +429,7 @@ public class PackConfig { this.singlePack = cfg.singlePack; this.searchForReuseTimeout = cfg.searchForReuseTimeout; this.minBytesForObjSizeIndex = cfg.minBytesForObjSizeIndex; + this.quickMatchSearchForReuse = cfg.quickMatchSearchForReuse; } /** @@ -690,6 +694,44 @@ public class PackConfig { } /** + * Whether the search for reuse phase should stop at the first object representation. + * <p> + * When enabled, the search prioritizes packfiles with bitmaps and stops scanning upon finding the first + * matching object representation, potentially sacrificing finding the best representation for efficiency. + * </p> + * <p> + * This configuration influences the trade-off between efficiency and the quality + * of object representation chosen during the search for reuse phase. + * </p> + * + * @return {@code true} if the first-match search for reuse is enabled, {@code false} otherwise. + * @since 6.9.1 + */ + public boolean getQuickMatchSearchForReuse() { + return quickMatchSearchForReuse; + } + + /** + * Set whether the search for reuse phase should stop at the first object representation. + * <p> + * When enabled, the search prioritizes packfiles with bitmaps and stops scanning upon finding the first + * matching object representation, potentially sacrificing finding the best representation for efficiency. + * </p> + * <p> + * This configuration influences the trade-off between efficiency and the quality + * of object representation chosen during the search for reuse phase. + * </p> + * + * @param quickMatchSearchForReuse + * true to stop finding the first matching object representation in search for reuse + * + * @since 6.9.1 + */ + public void setQuickMatchSearchForReuse(boolean quickMatchSearchForReuse) { + this.quickMatchSearchForReuse = quickMatchSearchForReuse; + } + + /** * Get the number of objects to try when looking for a delta base. * * This limit is per thread, if 4 threads are used the actual memory used @@ -1434,6 +1476,9 @@ public class PackConfig { setSinglePack(rc.getBoolean(CONFIG_PACK_SECTION, CONFIG_KEY_SINGLE_PACK, getSinglePack())); + setQuickMatchSearchForReuse(rc.getBoolean(CONFIG_PACK_SECTION, + CONFIG_KEY_QUICK_MATCH_SEARCH_FOR_REUSE, + getQuickMatchSearchForReuse())); setWriteReverseIndex(rc.getBoolean(CONFIG_PACK_SECTION, CONFIG_KEY_WRITE_REVERSE_INDEX, isWriteReverseIndex())); boolean buildBitmapsFromConfig = rc.getBoolean(CONFIG_PACK_SECTION, @@ -1522,6 +1567,7 @@ public class PackConfig { b.append(", singlePack=").append(getSinglePack()); //$NON-NLS-1$ b.append(", minBytesForObjSizeIndex=") //$NON-NLS-1$ .append(getMinBytesForObjSizeIndex()); + b.append(", quickMatchSearchForReuse=").append(getQuickMatchSearchForReuse()); //$NON-NLS-1$ return b.toString(); } } |