summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Turner <dturner@twosigma.com>2017-02-08 15:07:18 -0500
committerMatthias Sohn <matthias.sohn@sap.com>2017-06-06 01:18:29 +0200
commit6b1e3c58b16651dc72ec79a614d507e014d18393 (patch)
tree4e3d3fb306a70c46cc402a451a79c17e32181a70
parentf17ec3928c45d7f5170e92b4e0e1f7390de5fff2 (diff)
downloadjgit-6b1e3c58b16651dc72ec79a614d507e014d18393.tar.gz
jgit-6b1e3c58b16651dc72ec79a614d507e014d18393.zip
Run auto GC in the background
When running an automatic GC on a FileRepository, when the caller passes a NullProgressMonitor, run the GC in a background thread. Use a thread pool of size 1 to limit the number of background threads spawned for background gc in the same application. In the next minor release we can make the thread pool configurable. In some cases, the auto GC limit is lower than the true number of unreachable loose objects, so auto GC will run after every (e.g) fetch operation. This leads to the appearance of poor fetch performance. Since these GCs will never make progress (until either the objects become referenced, or the two week timeout expires), blocking on them simply reduces throughput. In the event that an auto GC would make progress, it's still OK if it runs in the background. The progress will still happen. This matches the behavior of regular git. Git (and now jgit) uses the lock file for gc.log to prevent simultaneous runs of background gc. Further, it writes errors to gc.log, and won't run background gc if that file is present and recent. If gc.log is too old (according to the config gc.logexpiry), it will be ignored. Change-Id: I3870cadb4a0a6763feff252e6eaef99f4aa8d0df Signed-off-by: David Turner <dturner@twosigma.com> Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
-rw-r--r--org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java7
-rw-r--r--org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java1
-rw-r--r--org.eclipse.jgit/.settings/.api_filters8
-rw-r--r--org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties2
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java2
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java6
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java91
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java191
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java14
9 files changed, 320 insertions, 2 deletions
diff --git a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java
index 933faf9bfa..6ace9fc122 100644
--- a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java
+++ b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java
@@ -62,6 +62,7 @@ import java.util.TreeSet;
import org.eclipse.jgit.dircache.DirCache;
import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.internal.storage.file.FileRepository;
+import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.PersonIdent;
@@ -121,6 +122,12 @@ public abstract class LocalDiskRepositoryTestCase {
mockSystemReader = new MockSystemReader();
mockSystemReader.userGitConfig = new FileBasedConfig(new File(tmp,
"usergitconfig"), FS.DETECTED);
+ // We have to set autoDetach to false for tests, because tests expect to be able
+ // to clean up by recursively removing the repository, and background GC might be
+ // in the middle of writing or deleting files, which would disrupt this.
+ mockSystemReader.userGitConfig.setBoolean(ConfigConstants.CONFIG_GC_SECTION,
+ null, ConfigConstants.CONFIG_KEY_AUTODETACH, false);
+ mockSystemReader.userGitConfig.save();
ceilTestDirectories(getCeilings());
SystemReader.setInstance(mockSystemReader);
diff --git a/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java b/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java
index c43accdb6f..0ce645139d 100644
--- a/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java
+++ b/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java
@@ -74,6 +74,7 @@ public class ConfigTest extends CLIRepositoryTestCase {
String[] output = execute("git config --list");
List<String> expect = new ArrayList<>();
+ expect.add("gc.autoDetach=false");
expect.add("core.filemode=" + !isWindows);
expect.add("core.logallrefupdates=true");
if (isMac)
diff --git a/org.eclipse.jgit/.settings/.api_filters b/org.eclipse.jgit/.settings/.api_filters
index 6441232363..822f1215d5 100644
--- a/org.eclipse.jgit/.settings/.api_filters
+++ b/org.eclipse.jgit/.settings/.api_filters
@@ -1,5 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit" version="2">
+ <resource path="META-INF/MANIFEST.MF">
+ <filter id="924844039">
+ <message_arguments>
+ <message_argument value="4.7.1"/>
+ <message_argument value="4.7.0"/>
+ </message_arguments>
+ </filter>
+ </resource>
<resource path="src/org/eclipse/jgit/api/ArchiveCommand.java" type="org.eclipse.jgit.api.ArchiveCommand$Format">
<filter comment="OSGi semver allows to break implementors in minor releases" id="403804204">
<message_arguments>
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index 4ac399f9ec..03065da8ad 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -299,6 +299,8 @@ flagNotFromThis={0} not from this.
flagsAlreadyCreated={0} flags already created.
funnyRefname=funny refname
gcFailed=Garbage collection failed.
+gcLogExists=A previous GC run reported an error: ''{0}''. Automatic gc will fail until ''{1}'' is removed.
+gcTooManyUnpruned=Too many loose, unpruneable objects after garbage collection. Consider adjusting gc.auto or gc.pruneExpire.
gitmodulesNotFound=.gitmodules not found in tree.
headRequiredToStash=HEAD required to stash local changes
hoursAgo={0} hours ago
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index a81c93a013..32bfb7e579 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -358,6 +358,8 @@ public class JGitText extends TranslationBundle {
/***/ public String flagsAlreadyCreated;
/***/ public String funnyRefname;
/***/ public String gcFailed;
+ /***/ public String gcLogExists;
+ /***/ public String gcTooManyUnpruned;
/***/ public String gitmodulesNotFound;
/***/ public String headRequiredToStash;
/***/ public String hoursAgo;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java
index ba52251a4c..ef9aa37d06 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java
@@ -619,12 +619,18 @@ public class FileRepository extends Repository {
}
+ private boolean shouldAutoDetach() {
+ return getConfig().getBoolean(ConfigConstants.CONFIG_GC_SECTION,
+ ConfigConstants.CONFIG_KEY_AUTODETACH, true);
+ }
+
@Override
public void autoGC(ProgressMonitor monitor) {
GC gc = new GC(this);
gc.setPackConfig(new PackConfig(this));
gc.setProgressMonitor(monitor);
gc.setAuto(true);
+ gc.setBackground(shouldAutoDetach());
try {
gc.gc();
} catch (ParseException | IOException e) {
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
index c68e5f7f3d..e1b309da64 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
@@ -50,6 +50,8 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.file.DirectoryStream;
@@ -73,11 +75,17 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.eclipse.jgit.annotations.NonNull;
+import org.eclipse.jgit.api.errors.JGitInternalException;
import org.eclipse.jgit.dircache.DirCacheIterator;
import org.eclipse.jgit.errors.CancelledException;
import org.eclipse.jgit.errors.CorruptObjectException;
@@ -143,6 +151,8 @@ public class GC {
private static final int DEFAULT_AUTOLIMIT = 6700;
+ private static ExecutorService executor = Executors.newFixedThreadPool(1);
+
private final FileRepository repo;
private ProgressMonitor pm;
@@ -178,6 +188,11 @@ public class GC {
private boolean automatic;
/**
+ * Whether to run gc in a background thread
+ */
+ private boolean background;
+
+ /**
* Creates a new garbage collector with default values. An expirationTime of
* two weeks and <code>null</code> as progress monitor will be used.
*
@@ -202,13 +217,73 @@ public class GC {
* first check whether any housekeeping is required; if not, it exits
* without performing any work.
*
+ * If {@link #setBackground(boolean)} was set to {@code true}
+ * {@code collectGarbage} will start the gc in the background, and then
+ * return immediately. In this case, errors will not be reported except in
+ * gc.log.
+ *
* @return the collection of {@link PackFile}'s which are newly created
* @throws IOException
* @throws ParseException
* If the configuration parameter "gc.pruneexpire" couldn't be
* parsed
*/
+ // TODO(ms): in 5.0 change signature and return Future<Collection<PackFile>>
public Collection<PackFile> gc() throws IOException, ParseException {
+ final GcLog gcLog = background ? new GcLog(repo) : null;
+ if (gcLog != null && !gcLog.lock(background)) {
+ // there is already a background gc running
+ return Collections.emptyList();
+ }
+
+ Callable<Collection<PackFile>> gcTask = () -> {
+ try {
+ Collection<PackFile> newPacks = doGc();
+ if (automatic && tooManyLooseObjects() && gcLog != null) {
+ String message = JGitText.get().gcTooManyUnpruned;
+ gcLog.write(message);
+ gcLog.commit();
+ }
+ return newPacks;
+ } catch (IOException | ParseException e) {
+ if (background) {
+ if (gcLog == null) {
+ // Lacking a log, there's no way to report this.
+ return Collections.emptyList();
+ }
+ try {
+ gcLog.write(e.getMessage());
+ StringWriter sw = new StringWriter();
+ e.printStackTrace(new PrintWriter(sw));
+ gcLog.write(sw.toString());
+ gcLog.commit();
+ } catch (IOException e2) {
+ e2.addSuppressed(e);
+ LOG.error(e2.getMessage(), e2);
+ }
+ } else {
+ throw new JGitInternalException(e.getMessage(), e);
+ }
+ } finally {
+ if (gcLog != null) {
+ gcLog.unlock();
+ }
+ }
+ return Collections.emptyList();
+ };
+ Future<Collection<PackFile>> result = executor.submit(gcTask);
+ if (background) {
+ // TODO(ms): in 5.0 change signature and return the Future
+ return Collections.emptyList();
+ }
+ try {
+ return result.get();
+ } catch (InterruptedException | ExecutionException e) {
+ throw new IOException(e);
+ }
+ }
+
+ private Collection<PackFile> doGc() throws IOException, ParseException {
if (automatic && !needGc()) {
return Collections.emptyList();
}
@@ -1348,6 +1423,14 @@ public class GC {
this.automatic = auto;
}
+ /**
+ * @param background
+ * whether to run the gc in a background thread.
+ */
+ void setBackground(boolean background) {
+ this.background = background;
+ }
+
private boolean needGc() {
if (tooManyPacks()) {
addRepackAllOption();
@@ -1386,8 +1469,7 @@ public class GC {
* @return {@code true} if number of loose objects > gc.auto (default 6700)
*/
boolean tooManyLooseObjects() {
- int auto = repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION,
- ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT);
+ int auto = getLooseObjectLimit();
if (auto <= 0) {
return false;
}
@@ -1419,4 +1501,9 @@ public class GC {
}
return false;
}
+
+ private int getLooseObjectLimit() {
+ return repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION,
+ ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT);
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java
new file mode 100644
index 0000000000..9ea77cc7db
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2017 Two Sigma Open Source
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.internal.storage.file;
+
+import org.eclipse.jgit.api.errors.JGitInternalException;
+import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.lib.ConfigConstants;
+import org.eclipse.jgit.util.GitDateParser;
+import org.eclipse.jgit.util.SystemReader;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.attribute.FileTime;
+import java.text.MessageFormat;
+import java.text.ParseException;
+import java.time.Instant;
+
+/**
+ * This class manages the gc.log file for a {@link FileRepository}.
+ */
+class GcLog {
+ private final FileRepository repo;
+
+ private final File logFile;
+
+ private final LockFile lock;
+
+ private Instant gcLogExpire;
+
+ private static final String LOG_EXPIRY_DEFAULT = "1.day.ago"; //$NON-NLS-1$
+
+ private boolean nonEmpty = false;
+
+ /**
+ * Construct a GcLog object for a {@link FileRepository}
+ *
+ * @param repo
+ * the repository
+ */
+ GcLog(FileRepository repo) {
+ this.repo = repo;
+ logFile = new File(repo.getDirectory(), "gc.log"); //$NON-NLS-1$
+ lock = new LockFile(logFile);
+ }
+
+ private Instant getLogExpiry() throws ParseException {
+ if (gcLogExpire == null) {
+ String logExpiryStr = repo.getConfig().getString(
+ ConfigConstants.CONFIG_GC_SECTION, null,
+ ConfigConstants.CONFIG_KEY_LOGEXPIRY);
+ if (logExpiryStr == null) {
+ logExpiryStr = LOG_EXPIRY_DEFAULT;
+ }
+ gcLogExpire = GitDateParser.parse(logExpiryStr, null,
+ SystemReader.getInstance().getLocale()).toInstant();
+ }
+ return gcLogExpire;
+ }
+
+ private boolean autoGcBlockedByOldLockFile(boolean background) {
+ try {
+ FileTime lastModified = Files.getLastModifiedTime(logFile.toPath());
+ if (lastModified.toInstant().compareTo(getLogExpiry()) > 0) {
+ // There is an existing log file, which is too recent to ignore
+ if (!background) {
+ try (BufferedReader reader = Files
+ .newBufferedReader(logFile.toPath())) {
+ char[] buf = new char[1000];
+ int len = reader.read(buf, 0, 1000);
+ String oldError = new String(buf, 0, len);
+
+ throw new JGitInternalException(MessageFormat.format(
+ JGitText.get().gcLogExists, oldError, logFile));
+ }
+ }
+ return true;
+ }
+ } catch (NoSuchFileException e) {
+ // No existing log file, OK.
+ } catch (IOException | ParseException e) {
+ throw new JGitInternalException(e.getMessage(), e);
+ }
+ return false;
+ }
+
+ /**
+ * Lock the GC log file for updates
+ *
+ * @param background
+ * If true, and if gc.log already exists, unlock and return false
+ * @return {@code true} if we hold the lock
+ */
+ boolean lock(boolean background) {
+ try {
+ if (!lock.lock()) {
+ return false;
+ }
+ } catch (IOException e) {
+ throw new JGitInternalException(e.getMessage(), e);
+ }
+ if (autoGcBlockedByOldLockFile(background)) {
+ lock.unlock();
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Unlock (roll back) the GC log lock
+ */
+ void unlock() {
+ lock.unlock();
+ }
+
+ /**
+ * Commit changes to the gc log, if there have been any writes. Otherwise,
+ * just unlock and delete the existing file (if any)
+ *
+ * @return true if committing (or unlocking/deleting) succeeds.
+ */
+ boolean commit() {
+ if (nonEmpty) {
+ return lock.commit();
+ } else {
+ logFile.delete();
+ lock.unlock();
+ return true;
+ }
+ }
+
+ /**
+ * Write to the pending gc log. Content will be committed upon a call to
+ * commit()
+ *
+ * @param content
+ * The content to write
+ * @throws IOException
+ */
+ void write(String content) throws IOException {
+ if (content.length() > 0) {
+ nonEmpty = true;
+ }
+ lock.write(content.getBytes(UTF_8));
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
index 74fc7067a1..189361b709 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java
@@ -291,6 +291,20 @@ public class ConfigConstants {
public static final String CONFIG_KEY_PRUNEPACKEXPIRE = "prunepackexpire";
/**
+ * The "logexpiry" key
+ *
+ * @since 4.7
+ */
+ public static final String CONFIG_KEY_LOGEXPIRY = "logExpiry";
+
+ /**
+ * The "autodetach" key
+ *
+ * @since 4.7
+ */
+ public static final String CONFIG_KEY_AUTODETACH = "autoDetach";
+
+ /**
* The "aggressiveDepth" key
* @since 3.6
*/