Browse Source

searchForReuse might impact performance in large repositories

The search for reuse phase for *all* the objects scans *all*
the packfiles, looking for the best candidate to serve back to the
client.

This can lead to an expensive operation when the number of
packfiles and objects is high.

Add parameter "pack.searchForReuseTimeout" to limit the time spent
on this search.

Change-Id: I54f5cddb6796fdc93ad9585c2ab4b44854fa6c48
changes/55/181355/20
Fabio Ponciroli 1 year ago
parent
commit
6976a30f44

+ 1
- 0
Documentation/config-options.md View File

@@ -102,6 +102,7 @@ Proxy configuration uses the standard Java mechanisms via class `java.net.ProxyS
| `prunePreserved`, only via API of PackConfig | `false` | ⃞ | Whether to remove preserved pack files in a preserved directory. |
| `pack.reuseDeltas` | `true` |⃞ | Whether to reuse deltas existing in repository. |
| `pack.reuseObjects` | `true` | ⃞ | Whether to reuse existing objects representation in repository. |
| `pack.searchForReuseTimeout` | | ⃞ | Search for reuse phase timeout. Expressed as a `Duration`, i.e.: `50sec`. |
| `pack.singlePack` | `false` | ⃞ | Whether all of `refs/*` should be packed in a single pack. |
| `pack.threads` | `0` (auto-detect number of processors) | ✅ | Number of threads to use for delta compression. |
| `pack.waitPreventRacyPack` | `false` | ⃞ | Whether we wait before opening a newly written pack to prevent its lastModified timestamp could be racy. |

+ 133
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java View File

@@ -18,6 +18,10 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -25,6 +29,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -32,6 +37,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
import org.eclipse.jgit.internal.storage.pack.PackExt;
@@ -43,6 +49,7 @@ import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSet;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.Sets;
import org.eclipse.jgit.revwalk.DepthWalk;
@@ -58,6 +65,7 @@ import org.eclipse.jgit.transport.PackParser;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;

public class PackWriterTest extends SampleDataRepositoryTestCase {

@@ -626,6 +634,131 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
}
}

@Test
public void testTotalPackFilesScanWhenSearchForReuseTimeoutNotSet()
throws Exception {
FileRepository fileRepository = setUpRepoWithMultiplePackfiles();
PackWriter mockedPackWriter = Mockito
.spy(new PackWriter(config, fileRepository.newObjectReader()));

doNothing().when(mockedPackWriter).select(any(), any());

try (FileOutputStream packOS = new FileOutputStream(
getPackFileToWrite(fileRepository, mockedPackWriter))) {
mockedPackWriter.writePack(NullProgressMonitor.INSTANCE,
NullProgressMonitor.INSTANCE, packOS);
}

long numberOfPackFiles = new GC(fileRepository)
.getStatistics().numberOfPackFiles;
int expectedSelectCalls =
// Objects contained in multiple packfiles * number of packfiles
2 * (int) numberOfPackFiles +
// Objects in single packfile
1;
verify(mockedPackWriter, times(expectedSelectCalls)).select(any(),
any());
}

@Test
public void testTotalPackFilesScanWhenSkippingSearchForReuseTimeoutCheck()
throws Exception {
FileRepository fileRepository = setUpRepoWithMultiplePackfiles();
PackConfig packConfig = new PackConfig();
packConfig.setSearchForReuseTimeout(Duration.ofSeconds(-1));
PackWriter mockedPackWriter = Mockito.spy(
new PackWriter(packConfig, fileRepository.newObjectReader()));

doNothing().when(mockedPackWriter).select(any(), any());

try (FileOutputStream packOS = new FileOutputStream(
getPackFileToWrite(fileRepository, mockedPackWriter))) {
mockedPackWriter.writePack(NullProgressMonitor.INSTANCE,
NullProgressMonitor.INSTANCE, packOS);
}

long numberOfPackFiles = new GC(fileRepository)
.getStatistics().numberOfPackFiles;
int expectedSelectCalls =
// Objects contained in multiple packfiles * number of packfiles
2 * (int) numberOfPackFiles +
// Objects contained in single packfile
1;
verify(mockedPackWriter, times(expectedSelectCalls)).select(any(),
any());
}

@Test
public void testPartialPackFilesScanWhenDoingSearchForReuseTimeoutCheck()
throws Exception {
FileRepository fileRepository = setUpRepoWithMultiplePackfiles();
PackConfig packConfig = new PackConfig();
packConfig.setSearchForReuseTimeout(Duration.ofSeconds(-1));
PackWriter mockedPackWriter = Mockito.spy(
new PackWriter(packConfig, fileRepository.newObjectReader()));
mockedPackWriter.enableSearchForReuseTimeout();

doNothing().when(mockedPackWriter).select(any(), any());

try (FileOutputStream packOS = new FileOutputStream(
getPackFileToWrite(fileRepository, mockedPackWriter))) {
mockedPackWriter.writePack(NullProgressMonitor.INSTANCE,
NullProgressMonitor.INSTANCE, packOS);
}

int expectedSelectCalls = 3; // Objects in packfiles
verify(mockedPackWriter, times(expectedSelectCalls)).select(any(),
any());
}

/**
* Creates objects and packfiles in the following order:
* <ul>
* <li>Creates 2 objects (C1 = commit, T1 = tree)
* <li>Creates packfile P1 (containing C1, T1)
* <li>Creates 1 object (C2 commit)
* <li>Creates packfile P2 (containing C1, T1, C2)
* <li>Create 1 object (C3 commit)
* </ul>
*
* @throws Exception
*/
private FileRepository setUpRepoWithMultiplePackfiles() throws Exception {
FileRepository fileRepository = createWorkRepository();
try (Git git = new Git(fileRepository)) {
// Creates 2 objects (C1 = commit, T1 = tree)
git.commit().setMessage("First commit").call();
GC gc = new GC(fileRepository);
gc.setPackExpireAgeMillis(Long.MAX_VALUE);
gc.setExpireAgeMillis(Long.MAX_VALUE);
// Creates packfile P1 (containing C1, T1)
gc.gc();
// Creates 1 object (C2 commit)
git.commit().setMessage("Second commit").call();
// Creates packfile P2 (containing C1, T1, C2)
gc.gc();
// Create 1 object (C3 commit)
git.commit().setMessage("Third commit").call();
}
return fileRepository;
}

private PackFile getPackFileToWrite(FileRepository fileRepository,
PackWriter mockedPackWriter) throws IOException {
File packdir = fileRepository.getObjectDatabase().getPackDirectory();
PackFile packFile = new PackFile(packdir,
mockedPackWriter.computeName(), PackExt.PACK);

Set<ObjectId> all = new HashSet<>();
for (Ref r : fileRepository.getRefDatabase().getRefs()) {
all.add(r.getObjectId());
}

mockedPackWriter.preparePack(NullProgressMonitor.INSTANCE, all,
PackWriter.NONE);
return packFile;
}

private FileRepository setupRepoForShallowFetch() throws Exception {
FileRepository repo = createBareRepository();
// TestRepository will close the repo, but we need to return an open

+ 8
- 0
org.eclipse.jgit/.settings/.api_filters View File

@@ -1,5 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit" version="2">
<resource path="src/org/eclipse/jgit/storage/pack/PackConfig.java" type="org.eclipse.jgit.storage.pack.PackConfig">
<filter id="336658481">
<message_arguments>
<message_argument value="org.eclipse.jgit.storage.pack.PackConfig"/>
<message_argument value="DEFAULT_SEARCH_FOR_REUSE_TIMEOUT"/>
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/transport/SshConstants.java" type="org.eclipse.jgit.transport.SshConstants">
<filter id="1142947843">
<message_arguments>

+ 1
- 0
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties View File

@@ -632,6 +632,7 @@ s3ActionWriting=Writing
searchForReachableBranches=Finding reachable branches
saveFileStoreAttributesFailed=Saving measured FileStore attributes to user config failed
searchForReuse=Finding sources
searchForReuseTimeout=Search for reuse timed out after {0} seconds
searchForSizes=Getting sizes
secondsAgo={0} seconds ago
selectingCommits=Selecting commits

+ 42
- 0
org.eclipse.jgit/src/org/eclipse/jgit/errors/SearchForReuseTimeout.java View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2021, Fabio Ponciroli <ponch@gerritforge.com>
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

package org.eclipse.jgit.errors;

import org.eclipse.jgit.internal.JGitText;

import java.io.IOException;
import java.text.MessageFormat;
import java.time.Duration;

/**
* Thrown when the search for reuse phase times out.
*
* @since 5.13
*/
public class SearchForReuseTimeout extends IOException {
private static final long serialVersionUID = 1L;

/**
* Construct a search for reuse timeout error.
*
* @param timeout
* time exceeded during the search for reuse phase.
*/
public SearchForReuseTimeout(Duration timeout) {
super(MessageFormat.format(JGitText.get().searchForReuseTimeout,
Long.valueOf(timeout.getSeconds())));
}

@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}

+ 1
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java View File

@@ -660,6 +660,7 @@ public class JGitText extends TranslationBundle {
/***/ public String saveFileStoreAttributesFailed;
/***/ public String searchForReachableBranches;
/***/ public String searchForReuse;
/***/ public String searchForReuseTimeout;
/***/ public String searchForSizes;
/***/ public String secondsAgo;
/***/ public String selectingCommits;

+ 4
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackDirectory.java View File

@@ -33,6 +33,7 @@ import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.errors.PackMismatchException;
import org.eclipse.jgit.errors.SearchForReuseTimeout;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.ObjectToPack;
import org.eclipse.jgit.internal.storage.pack.PackExt;
@@ -264,7 +265,10 @@ class PackDirectory {
p.resetTransientErrorCount();
if (rep != null) {
packer.select(otp, rep);
packer.checkSearchForReuseTimeout();
}
} catch (SearchForReuseTimeout e) {
break SEARCH;
} catch (PackMismatchException e) {
// Pack was modified; refresh the entire pack list.
//

+ 44
- 0
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java View File

@@ -25,6 +25,7 @@ import java.io.OutputStream;
import java.lang.ref.WeakReference;
import java.security.MessageDigest;
import java.text.MessageFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -54,6 +55,7 @@ import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.SearchForReuseTimeout;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder;
@@ -262,6 +264,12 @@ public class PackWriter implements AutoCloseable {

private boolean indexDisabled;

private boolean checkSearchForReuseTimeout = false;

private final Duration searchForReuseTimeout;

private long searchForReuseStartTimeEpoc;

private int depth;

private Collection<? extends ObjectId> unshallowObjects;
@@ -356,6 +364,7 @@ public class PackWriter implements AutoCloseable {

deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas();
searchForReuseTimeout = config.getSearchForReuseTimeout();
reuseValidate = true; // be paranoid by default
stats = statsAccumulator != null ? statsAccumulator
: new PackStatistics.Accumulator();
@@ -404,6 +413,24 @@ public class PackWriter implements AutoCloseable {
return deltaBaseAsOffset;
}

/**
* Check whether the search for reuse phase is taking too long. This could
* be the case when the number of objects and pack files is high and the
* system is under pressure. If that's the case and
* checkSearchForReuseTimeout is true abort the search.
*
* @throws SearchForReuseTimeout
* if the search for reuse is taking too long.
*/
public void checkSearchForReuseTimeout() throws SearchForReuseTimeout {
if (checkSearchForReuseTimeout
&& Duration.ofMillis(System.currentTimeMillis()
- searchForReuseStartTimeEpoc)
.compareTo(searchForReuseTimeout) > 0) {
throw new SearchForReuseTimeout(searchForReuseTimeout);
}
}

/**
* Set writer delta base format. Delta base can be written as an offset in a
* pack file (new approach reducing file size) or as an object id (legacy
@@ -419,6 +446,22 @@ public class PackWriter implements AutoCloseable {
this.deltaBaseAsOffset = deltaBaseAsOffset;
}

/**
* Set the writer to check for long search for reuse, exceeding the timeout.
* Selecting an object representation can be an expensive operation. It is
* possible to set a max search for reuse time (see
* PackConfig#CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT for more details).
*
* However some operations, i.e.: GC, need to find the best candidate
* regardless how much time the operation will need to finish.
*
* This method enables the search for reuse timeout check, otherwise
* disabled.
*/
public void enableSearchForReuseTimeout() {
this.checkSearchForReuseTimeout = true;
}

/**
* Check if the writer will reuse commits that are already stored as deltas.
*
@@ -1306,6 +1349,7 @@ public class PackWriter implements AutoCloseable {
cnt += objectsLists[OBJ_TAG].size();

long start = System.currentTimeMillis();
searchForReuseStartTimeEpoc = start;
beginPhase(PackingPhase.FINDING_SOURCES, monitor, cnt);
if (cnt <= 4096) {
// For small object counts, do everything as one list.

+ 8
- 0
org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java View File

@@ -736,4 +736,12 @@ public final class ConfigConstants {
* @since 5.11
*/
public static final String CONFIG_KEY_DEFAULT_BRANCH = "defaultbranch";

/**
* The "pack.searchForReuseTimeout" key
*
* @since 5.13
*/
public static final String CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT = "searchforreusetimeout";

}

+ 47
- 0
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackConfig.java View File

@@ -29,6 +29,7 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_INDEXVERSION;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_MIN_SIZE_PREVENT_RACYPACK;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_DELTAS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_REUSE_OBJECTS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_SINGLE_PACK;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_THREADS;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_WAIT_PREVENT_RACYPACK;
@@ -36,7 +37,9 @@ import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_WINDOW;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_KEY_WINDOW_MEMORY;
import static org.eclipse.jgit.lib.ConfigConstants.CONFIG_PACK_SECTION;

import java.time.Duration;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.zip.Deflater;

import org.eclipse.jgit.internal.storage.file.PackIndexWriter;
@@ -222,6 +225,16 @@ public class PackConfig {
*/
public static final int DEFAULT_BITMAP_INACTIVE_BRANCH_AGE_IN_DAYS = 90;

/**
* Default max time to spend during the search for reuse phase. This
* optimization is disabled by default: {@value}
*
* @see #setSearchForReuseTimeout(Duration)
* @since 5.13
*/
public static final Duration DEFAULT_SEARCH_FOR_REUSE_TIMEOUT = Duration
.ofSeconds(Integer.MAX_VALUE);

private int compressionLevel = Deflater.DEFAULT_COMPRESSION;

private boolean reuseDeltas = DEFAULT_REUSE_DELTAS;
@@ -272,6 +285,8 @@ public class PackConfig {

private int bitmapInactiveBranchAgeInDays = DEFAULT_BITMAP_INACTIVE_BRANCH_AGE_IN_DAYS;

private Duration searchForReuseTimeout = DEFAULT_SEARCH_FOR_REUSE_TIMEOUT;

private boolean cutDeltaChains;

private boolean singlePack;
@@ -342,6 +357,7 @@ public class PackConfig {
this.bitmapInactiveBranchAgeInDays = cfg.bitmapInactiveBranchAgeInDays;
this.cutDeltaChains = cfg.cutDeltaChains;
this.singlePack = cfg.singlePack;
this.searchForReuseTimeout = cfg.searchForReuseTimeout;
}

/**
@@ -1103,6 +1119,18 @@ public class PackConfig {
return bitmapInactiveBranchAgeInDays;
}

/**
* Get the max time to spend during the search for reuse phase.
*
* Default setting: {@value #DEFAULT_SEARCH_FOR_REUSE_TIMEOUT}
*
* @return the maximum time to spend during the search for reuse phase.
* @since 5.13
*/
public Duration getSearchForReuseTimeout() {
return searchForReuseTimeout;
}

/**
* Set the age in days that marks a branch as "inactive".
*
@@ -1116,6 +1144,19 @@ public class PackConfig {
bitmapInactiveBranchAgeInDays = ageInDays;
}

/**
* Set the max time to spend during the search for reuse phase.
*
* @param timeout
* max time allowed during the search for reuse phase
*
* Default setting: {@value #DEFAULT_SEARCH_FOR_REUSE_TIMEOUT}
* @since 5.13
*/
public void setSearchForReuseTimeout(Duration timeout) {
searchForReuseTimeout = timeout;
}

/**
* Update properties by setting fields from the configuration.
*
@@ -1179,6 +1220,10 @@ public class PackConfig {
setBitmapInactiveBranchAgeInDays(rc.getInt(CONFIG_PACK_SECTION,
CONFIG_KEY_BITMAP_INACTIVE_BRANCH_AGE_INDAYS,
getBitmapInactiveBranchAgeInDays()));
setSearchForReuseTimeout(Duration.ofSeconds(rc.getTimeUnit(
CONFIG_PACK_SECTION, null,
CONFIG_KEY_SEARCH_FOR_REUSE_TIMEOUT,
getSearchForReuseTimeout().getSeconds(), TimeUnit.SECONDS)));
setWaitPreventRacyPack(rc.getBoolean(CONFIG_PACK_SECTION,
CONFIG_KEY_WAIT_PREVENT_RACYPACK, isWaitPreventRacyPack()));
setMinSizePreventRacyPack(rc.getLong(CONFIG_PACK_SECTION,
@@ -1216,6 +1261,8 @@ public class PackConfig {
.append(getBitmapExcessiveBranchCount());
b.append(", bitmapInactiveBranchAge=") //$NON-NLS-1$
.append(getBitmapInactiveBranchAgeInDays());
b.append(", searchForReuseTimeout") //$NON-NLS-1$
.append(getSearchForReuseTimeout());
b.append(", singlePack=").append(getSinglePack()); //$NON-NLS-1$
return b.toString();
}

+ 1
- 0
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java View File

@@ -2359,6 +2359,7 @@ public class UploadPack {
GitProtocolConstants.SECTION_PACKFILE + '\n');
}
}
pw.enableSearchForReuseTimeout();
pw.writePack(pm, NullProgressMonitor.INSTANCE, packOut);

if (msgOut != NullOutputStream.INSTANCE) {

Loading…
Cancel
Save