summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuca Milanesio <luca.milanesio@gmail.com>2022-12-20 21:50:19 +0000
committerLuca Milanesio <luca.milanesio@gmail.com>2023-01-31 17:14:09 -0500
commitad977f157242d4d6b5ea4c45b2aa0c15d20b58ae (patch)
tree41e6682c5e7fb9b9d48787a378628e0ca42fae4b
parente4529cd39c42872e9b4f80d38659f9de37956634 (diff)
downloadjgit-ad977f157242d4d6b5ea4c45b2aa0c15d20b58ae.tar.gz
jgit-ad977f157242d4d6b5ea4c45b2aa0c15d20b58ae.zip
Allow the exclusions of refs prefixes from bitmap
When running a GC.repack() against a repository with over one thousands of refs/heads and tens of millions of ObjectIds, the calculation of all bitmaps associated with all the refs would result in an unreasonable big file that would take up to several hours to compute. Test scenario: repo with 2500 heads / 10M obj Intel Xeon E5-2680 2.5GHz Before this change: 20 mins After this change and 2300 heads excluded: 10 mins (90s for bitmap) Having such a large bitmap file is also slow in the runtime processing and have negligible or even negative benefits, because the time lost in reading and decompressing the bitmap in memory would not be compensated by the time saved by using it. It is key to preserve the bitmaps for those refs that are mostly used in clone/fetch and give the ability to exlude some refs prefixes that are known to be less frequently accessed, even though they may actually be actively written. Example: Gerrit sandbox branches may even be actively used and selected automatically because its commits are very recent, however, they may bloat the bitmap, making it ineffective. A mono-repo with tens of thousands of developers may have a relatively small number of active branches where the CI/CD jobs are continuously fetching/cloning the code. However, because Gerrit allows the use of sandbox branches, the total number of refs/heads may be even tens to hundred thousands. Change-Id: I466dcde69fa008e7f7785735c977f6e150e3b644 Signed-off-by: Luca Milanesio <luca.milanesio@gmail.com>
-rw-r--r--Documentation/config-options.md1
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java18
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java23
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java6
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java38
5 files changed, 82 insertions, 4 deletions
diff --git a/Documentation/config-options.md b/Documentation/config-options.md
index 19bcc33523..b4a0c1d98c 100644
--- a/Documentation/config-options.md
+++ b/Documentation/config-options.md
@@ -86,6 +86,7 @@ Proxy configuration uses the standard Java mechanisms via class `java.net.ProxyS
| `pack.bitmapContiguousCommitCount` | `100` | &#x20DE; | Count of most recent commits for which to build bitmaps. |
| `pack.bitmapDistantCommitSpan` | `5000` | &#x20DE; | Span of commits when building bitmaps for distant history. |
| `pack.bitmapExcessiveBranchCount` | `100` | &#x20DE; | The count of branches deemed "excessive". If the count of branches in a repository exceeds this number and bitmaps are enabled, "inactive" branches will have fewer bitmaps than "active" branches. |
+| `pack.bitmapExcludedRefsPrefixes` | | &#x20DE; | The refs prefixes to be excluded when building bitmaps. May be specified more than once to exclude multiple prefixes. |
| `pack.bitmapInactiveBranchAgeInDays` | `90` | &#x20DE; | Age in days that marks a branch as "inactive" for bitmap creation. |
| `pack.bitmapRecentCommitCount` | `20000` | &#x20DE; | Count at which to switch from `bitmapRecentCommitSpan` to `bitmapDistantCommitSpan`. |
| `pack.bitmapRecentCommitSpan` | `100` | &#x20DE; | Span of commits when building bitmaps for recent history. |
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java
index 55dfa697bc..190ac8b640 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/pack/GcCommitSelectionTest.java
@@ -221,6 +221,24 @@ public class GcCommitSelectionTest extends GcTestCase {
}
@Test
+ public void testBitmapsForExcludedBranches() throws Exception {
+ createNewCommitOnNewBranch("main");
+ createNewCommitOnNewBranch("other");
+ PackConfig packConfig = new PackConfig();
+ packConfig.setBitmapExcludedRefsPrefixes(new String[] { "refs/heads/other" });
+ gc.setPackConfig(packConfig);
+ gc.gc();
+ assertEquals(1,
+ gc.getStatistics().numberOfBitmaps);
+ }
+
+ private void createNewCommitOnNewBranch(String branchName) throws Exception {
+ BranchBuilder bb = tr.branch("refs/heads/" + branchName);
+ String msg = "New branch " + branchName;
+ bb.commit().message(msg).add("some-filename.txt", msg).create();
+ }
+
+ @Test
public void testSelectionOrderingWithChains() throws Exception {
/*-
* Create a history like this, where 'N' is the number of seconds from
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
index a14bb411ff..9e97659499 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
@@ -796,6 +796,10 @@ public class GC {
Set<ObjectId> tagTargets = new HashSet<>();
Set<ObjectId> indexObjects = listNonHEADIndexObjects();
+ Set<ObjectId> refsToExcludeFromBitmap = repo.getRefDatabase()
+ .getRefsByPrefix(pconfig.getBitmapExcludedRefsPrefixes())
+ .stream().map(Ref::getObjectId).collect(Collectors.toSet());
+
for (Ref ref : refsBefore) {
checkCancelled();
nonHeads.addAll(listRefLogObjects(ref, 0));
@@ -840,7 +844,7 @@ public class GC {
Pack heads = null;
if (!allHeadsAndTags.isEmpty()) {
heads = writePack(allHeadsAndTags, PackWriter.NONE, allTags,
- tagTargets, excluded);
+ refsToExcludeFromBitmap, tagTargets, excluded);
if (heads != null) {
ret.add(heads);
excluded.add(0, heads.getIndex());
@@ -848,13 +852,13 @@ public class GC {
}
if (!nonHeads.isEmpty()) {
Pack rest = writePack(nonHeads, allHeadsAndTags, PackWriter.NONE,
- tagTargets, excluded);
+ PackWriter.NONE, tagTargets, excluded);
if (rest != null)
ret.add(rest);
}
if (!txnHeads.isEmpty()) {
Pack txn = writePack(txnHeads, PackWriter.NONE, PackWriter.NONE,
- null, excluded);
+ PackWriter.NONE, null, excluded);
if (txn != null)
ret.add(txn);
}
@@ -1123,6 +1127,7 @@ public class GC {
private Pack writePack(@NonNull Set<? extends ObjectId> want,
@NonNull Set<? extends ObjectId> have, @NonNull Set<ObjectId> tags,
+ @NonNull Set<ObjectId> excludedRefsTips,
Set<ObjectId> tagTargets, List<ObjectIdSet> excludeObjects)
throws IOException {
checkCancelled();
@@ -1154,7 +1159,8 @@ public class GC {
if (excludeObjects != null)
for (ObjectIdSet idx : excludeObjects)
pw.excludeObjects(idx);
- pw.preparePack(pm, want, have, PackWriter.NONE, tags);
+ pw.preparePack(pm, want, have, PackWriter.NONE,
+ union(tags, excludedRefsTips));
if (pw.getObjectCount() == 0)
return null;
checkCancelled();
@@ -1267,6 +1273,15 @@ public class GC {
}
}
+ private Set<? extends ObjectId> union(Set<ObjectId> tags,
+ Set<ObjectId> excludedRefsHeadsTips) {
+ HashSet<ObjectId> unionSet = new HashSet<>(
+ tags.size() + excludedRefsHeadsTips.size());
+ unionSet.addAll(tags);
+ unionSet.addAll(excludedRefsHeadsTips);
+ return unionSet;
+ }
+
private void checkCancelled() throws CancelledException {
if (pm.isCancelled() || Thread.currentThread().isInterrupted()) {
throw new CancelledException(JGitText.get().operationCanceled);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
index 924328d8a6..6f76326bc9 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
@@ -621,6 +621,12 @@ public final class ConfigConstants {
public static final String CONFIG_KEY_BITMAP_EXCESSIVE_BRANCH_COUNT = "bitmapexcessivebranchcount";
/**
+ * The "pack.bitmapExcludedRefsPrefixes" key
+ * @since 5.13.2
+ */
+ public static final String CONFIG_KEY_BITMAP_EXCLUDED_REFS_PREFIXES = "bitmapexcludedrefsprefixes";
+
+ /**
* The "pack.bitmapInactiveBranchAgeInDays" key
* @since 5.8
*/
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
index 6aa8be642d..a10f6cf88a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java
@@ -16,6 +16,7 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BIGFILE_THRESHOLD;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_CONTIGUOUS_COMMIT_COUNT;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_DISTANT_COMMIT_SPAN;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_EXCESSIVE_BRANCH_COUNT;
+import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_EXCLUDED_REFS_PREFIXES;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_INACTIVE_BRANCH_AGE_INDAYS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BITMAP_RECENT_COMMIT_COUNT;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_BUILD_BITMAPS;
@@ -226,6 +227,14 @@ public class PackConfig {
public static final int DEFAULT_BITMAP_INACTIVE_BRANCH_AGE_IN_DAYS = 90;
/**
+ * Default refs prefixes excluded from the calculation of pack bitmaps.
+ *
+ * @see #setBitmapExcludedRefsPrefixes(String[])
+ * @since 5.13.2
+ */
+ public static final String[] DEFAULT_BITMAP_EXCLUDED_REFS_PREFIXES = new String[0];
+
+ /**
* Default max time to spend during the search for reuse phase. This
* optimization is disabled by default: {@value}
*
@@ -285,6 +294,8 @@ public class PackConfig {
private int bitmapInactiveBranchAgeInDays = DEFAULT_BITMAP_INACTIVE_BRANCH_AGE_IN_DAYS;
+ private String[] bitmapExcludedRefsPrefixes = DEFAULT_BITMAP_EXCLUDED_REFS_PREFIXES;
+
private Duration searchForReuseTimeout = DEFAULT_SEARCH_FOR_REUSE_TIMEOUT;
private boolean cutDeltaChains;
@@ -1145,6 +1156,27 @@ public class PackConfig {
}
/**
+ * Get the refs prefixes excluded from the Bitmap.
+ *
+ * @return the refs prefixes excluded from the Bitmap.
+ * @since 5.13.2
+ */
+ public String[] getBitmapExcludedRefsPrefixes() {
+ return bitmapExcludedRefsPrefixes;
+ }
+
+ /**
+ * Set the refs prefixes excluded from the Bitmap.
+ *
+ * @param excludedRefsPrefixes
+ * the refs prefixes excluded from the Bitmap.
+ * @since 5.13.2
+ */
+ public void setBitmapExcludedRefsPrefixes(String[] excludedRefsPrefixes) {
+ bitmapExcludedRefsPrefixes = excludedRefsPrefixes;
+ }
+
+ /**
* Set the max time to spend during the search for reuse phase.
*
* @param timeout
@@ -1220,6 +1252,12 @@ public class PackConfig {
setBitmapInactiveBranchAgeInDays(rc.getInt(CONFIG_PACK_SECTION,
CONFIG_KEY_BITMAP_INACTIVE_BRANCH_AGE_INDAYS,
getBitmapInactiveBranchAgeInDays()));
+ String[] excludedRefsPrefixesArray = rc.getStringList(CONFIG_PACK_SECTION,
+ null,
+ CONFIG_KEY_BITMAP_EXCLUDED_REFS_PREFIXES);
+ if(excludedRefsPrefixesArray.length > 0) {
+ setBitmapExcludedRefsPrefixes(excludedRefsPrefixesArray);
+ }
setSearchForReuseTimeout(Duration.ofSeconds(rc.getTimeUnit(
CONFIG_PACK_SECTION, null,
CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT,