From 64a404803eaccc88d7d57567c5cd86b88c342bec Mon Sep 17 00:00:00 2001 From: Matthias Sohn Date: Wed, 13 Apr 2016 14:54:09 +0200 Subject: [PATCH] Implement auto gc With the auto option, gc checks whether any housekeeping is required; if not, it exits without performing any work. Some JGit commands run gc --auto after performing operations that could create many loose objects. Housekeeping is required if there are too many loose objects or too many packs in the repository. If the number of loose objects exceeds the value of the gc.auto option jgit's GC consolidates all existing packs into a single pack (equivalent to -A option), whereas git-core would combine all loose objects into a single pack using repack -d -l. Setting the value of gc.auto to 0 disables automatic packing of loose objects. If the number of packs exceeds the value of gc.autoPackLimit, then existing packs (except those marked with a .keep file) are consolidated into a single pack by using the -A option of repack. Setting gc.autoPackLimit to 0 disables automatic consolidation of packs. Like git the following jgit commands run auto gc: - fetch - merge - rebase - receive-pack The auto gc for receive-pack can be suppressed by setting the config option receive.autogc = false Change-Id: I68a2a051b39ec2c53cb7c4b8f6c596ba65eeba5d Signed-off-by: Matthias Sohn --- .../org/eclipse/jgit/api/MergeCommand.java | 1 + .../org/eclipse/jgit/api/RebaseCommand.java | 1 + .../internal/storage/file/FileRepository.java | 19 ++- .../jgit/internal/storage/file/GC.java | 139 ++++++++++++++++++ .../org/eclipse/jgit/lib/ConfigConstants.java | 24 +++ .../src/org/eclipse/jgit/lib/Repository.java | 19 +++ .../eclipse/jgit/transport/ReceivePack.java | 12 ++ .../org/eclipse/jgit/transport/Transport.java | 3 + 8 files changed, 217 insertions(+), 1 deletion(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/MergeCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/MergeCommand.java index 38b10971f4..ced1863719 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/api/MergeCommand.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/MergeCommand.java @@ -381,6 +381,7 @@ public class MergeCommand extends GitCommand { .call().getId(); } mergeStatus = MergeStatus.MERGED; + getRepository().autoGC(monitor); } if (commit && squash) { msg = JGitText.get().squashCommitNotUpdatingHEAD; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/api/RebaseCommand.java b/org.eclipse.jgit/src/org/eclipse/jgit/api/RebaseCommand.java index 2d6a76b390..c5c0cfb821 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/api/RebaseCommand.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/api/RebaseCommand.java @@ -693,6 +693,7 @@ public class RebaseCommand extends GitCommand { String headName = rebaseState.readFile(HEAD_NAME); updateHead(headName, finalHead, upstreamCommit); boolean stashConflicts = autoStashApply(); + getRepository().autoGC(monitor); FileUtils.delete(rebaseState.getDir(), FileUtils.RECURSIVE); if (stashConflicts) return RebaseResult.STASH_APPLY_CONFLICTS_RESULT; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java index 53fd37e534..5b93399a76 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java @@ -53,9 +53,11 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.text.MessageFormat; +import java.text.ParseException; import java.util.HashSet; import java.util.Set; +import org.eclipse.jgit.api.errors.JGitInternalException; import org.eclipse.jgit.attributes.AttributesNode; import org.eclipse.jgit.attributes.AttributesNodeProvider; import org.eclipse.jgit.errors.ConfigInvalidException; @@ -63,15 +65,16 @@ import org.eclipse.jgit.events.ConfigChangedEvent; import org.eclipse.jgit.events.ConfigChangedListener; import org.eclipse.jgit.events.IndexChangedEvent; import org.eclipse.jgit.internal.JGitText; -import org.eclipse.jgit.internal.storage.reftree.RefTreeDatabase; import org.eclipse.jgit.internal.storage.file.ObjectDirectory.AlternateHandle; import org.eclipse.jgit.internal.storage.file.ObjectDirectory.AlternateRepository; +import org.eclipse.jgit.internal.storage.reftree.RefTreeDatabase; import org.eclipse.jgit.lib.BaseRepositoryBuilder; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.CoreConfig.HideDotFiles; import org.eclipse.jgit.lib.CoreConfig.SymLinks; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.RefDatabase; import org.eclipse.jgit.lib.RefUpdate; @@ -79,6 +82,7 @@ import org.eclipse.jgit.lib.ReflogReader; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.storage.file.FileBasedConfig; import org.eclipse.jgit.storage.file.FileRepositoryBuilder; +import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.util.FS; import org.eclipse.jgit.util.FileUtils; import org.eclipse.jgit.util.StringUtils; @@ -555,4 +559,17 @@ public class FileRepository extends Repository { } + @Override + public void autoGC(ProgressMonitor monitor) { + GC gc = new GC(this); + gc.setPackConfig(new PackConfig(this)); + gc.setProgressMonitor(monitor); + gc.setAuto(true); + try { + gc.gc(); + } catch (ParseException | IOException e) { + throw new JGitInternalException(JGitText.get().gcFailed, e); + } + } + } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java index 2e8da8fc9b..f55e15f5f9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java @@ -52,6 +52,9 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.channels.Channels; import java.nio.channels.FileChannel; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.text.MessageFormat; import java.text.ParseException; @@ -62,12 +65,14 @@ import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.regex.Pattern; import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.dircache.DirCacheIterator; @@ -100,6 +105,8 @@ import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.util.FileUtils; import org.eclipse.jgit.util.GitDateParser; import org.eclipse.jgit.util.SystemReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A garbage collector for git {@link FileRepository}. Instances of this class @@ -109,10 +116,20 @@ import org.eclipse.jgit.util.SystemReader; * adapted to FileRepositories. */ public class GC { + private final static Logger LOG = LoggerFactory + .getLogger(GC.class); + private static final String PRUNE_EXPIRE_DEFAULT = "2.weeks.ago"; //$NON-NLS-1$ private static final String PRUNE_PACK_EXPIRE_DEFAULT = "1.hour.ago"; //$NON-NLS-1$ + private static final Pattern PATTERN_LOOSE_OBJECT = Pattern + .compile("[0-9a-fA-F]{38}"); //$NON-NLS-1$ + + private static final int DEFAULT_AUTOPACKLIMIT = 50; + + private static final int DEFAULT_AUTOLIMIT = 6700; + private final FileRepository repo; private ProgressMonitor pm; @@ -142,6 +159,11 @@ public class GC { */ private long lastRepackTime; + /** + * Whether gc should do automatic housekeeping + */ + private boolean automatic; + /** * Creates a new garbage collector with default values. An expirationTime of * two weeks and null as progress monitor will be used. @@ -163,6 +185,10 @@ public class GC { *
  • prune all loose objects which are now reachable by packs
  • * * + * If {@link #setAuto(boolean)} was set to {@code true} {@code gc} will + * first check whether any housekeeping is required; if not, it exits + * without performing any work. + * * @return the collection of {@link PackFile}'s which are newly created * @throws IOException * @throws ParseException @@ -170,6 +196,9 @@ public class GC { * parsed */ public Collection gc() throws IOException, ParseException { + if (automatic && !needGc()) { + return Collections.emptyList(); + } pm.start(6 /* tasks */); packRefs(); // TODO: implement reflog_expire(pm, repo); @@ -1076,4 +1105,114 @@ public class GC { this.packExpire = packExpire; packExpireAgeMillis = -1; } + + /** + * Set the {@code gc --auto} option. + * + * With this option, gc checks whether any housekeeping is required; if not, + * it exits without performing any work. Some JGit commands run + * {@code gc --auto} after performing operations that could create many + * loose objects. + *

    + * Housekeeping is required if there are too many loose objects or too many + * packs in the repository. If the number of loose objects exceeds the value + * of the gc.auto option JGit GC consolidates all existing packs into a + * single pack (equivalent to {@code -A} option), whereas git-core would + * combine all loose objects into a single pack using {@code repack -d -l}. + * Setting the value of {@code gc.auto} to 0 disables automatic packing of + * loose objects. + *

    + * If the number of packs exceeds the value of {@code gc.autoPackLimit}, + * then existing packs (except those marked with a .keep file) are + * consolidated into a single pack by using the {@code -A} option of repack. + * Setting {@code gc.autoPackLimit} to 0 disables automatic consolidation of + * packs. + *

    + * Like git the following jgit commands run auto gc: + *

      + *
    • fetch
    • + *
    • merge
    • + *
    • rebase
    • + *
    • receive-pack
    • + *
    + * The auto gc for receive-pack can be suppressed by setting the config + * option {@code receive.autogc = false} + * + * @param auto + * defines whether gc should do automatic housekeeping + * @since 4.5 + */ + public void setAuto(boolean auto) { + this.automatic = auto; + } + + private boolean needGc() { + if (tooManyPacks()) { + addRepackAllOption(); + } else if (!tooManyLooseObjects()) { + return false; + } + // TODO run pre-auto-gc hook, if it fails return false + return true; + } + + private void addRepackAllOption() { + // TODO: if JGit GC is enhanced to support repack's option -l this + // method needs to be implemented + } + + /** + * @return {@code true} if number of packs > gc.autopacklimit (default 50) + */ + private boolean tooManyPacks() { + int autopacklimit = repo.getConfig().getInt( + ConfigConstants.CONFIG_GC_SECTION, + ConfigConstants.CONFIG_KEY_AUTOPACKLIMIT, + DEFAULT_AUTOPACKLIMIT); + if (autopacklimit <= 0) { + return false; + } + // JGit always creates two packfiles, one for the objects reachable from + // branches, and another one for the rest + return repo.getObjectDatabase().getPacks().size() > (autopacklimit + 1); + } + + /** + * Quickly estimate number of loose objects, SHA1 is distributed evenly so + * counting objects in one directory (bucket 17) is sufficient + * + * @return {@code true} if number of loose objects > gc.auto (default 6700) + */ + private boolean tooManyLooseObjects() { + int auto = repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION, + ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT); + if (auto <= 0) { + return false; + } + int n = 0; + int threshold = (auto + 255) / 256; + Path dir = repo.getObjectsDirectory().toPath().resolve("17"); //$NON-NLS-1$ + if (!Files.exists(dir)) { + return false; + } + try (DirectoryStream stream = Files.newDirectoryStream(dir, + new DirectoryStream.Filter() { + + public boolean accept(Path file) throws IOException { + return Files.isRegularFile(file) && PATTERN_LOOSE_OBJECT + .matcher(file.getFileName().toString()) + .matches(); + } + })) { + Iterator iter = stream.iterator(); + while (iter.hasNext()) { + if (n++ > threshold) { + return true; + } + } + } catch (IOException e) { + LOG.error(e.getMessage(), e); + } + return false; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java index 9a1f565d71..4d3e118011 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java @@ -65,6 +65,12 @@ public class ConfigConstants { /** The "dfs" section */ public static final String CONFIG_DFS_SECTION = "dfs"; + /** + * The "receive" section + * @since 4.6 + */ + public static final String CONFIG_RECEIVE_SECTION = "receive"; + /** The "user" section */ public static final String CONFIG_USER_SECTION = "user"; @@ -107,6 +113,24 @@ public class ConfigConstants { /** The "autocrlf" key */ public static final String CONFIG_KEY_AUTOCRLF = "autocrlf"; + /** + * The "auto" key + * @since 4.6 + */ + public static final String CONFIG_KEY_AUTO = "auto"; + + /** + * The "autogc" key + * @since 4.6 + */ + public static final String CONFIG_KEY_AUTOGC = "autogc"; + + /** + * The "autopacklimit" key + * @since 4.6 + */ + public static final String CONFIG_KEY_AUTOPACKLIMIT = "autopacklimit"; + /** * The "eol" key * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java index e9ff504a42..1909037eeb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/Repository.java @@ -80,6 +80,7 @@ import org.eclipse.jgit.events.IndexChangedListener; import org.eclipse.jgit.events.ListenerList; import org.eclipse.jgit.events.RepositoryEvent; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.file.GC; import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevObject; @@ -1836,4 +1837,22 @@ public abstract class Repository implements AutoCloseable { return getConfig() .getSubsections(ConfigConstants.CONFIG_REMOTE_SECTION); } + + /** + * Check whether any housekeeping is required; if yes, run garbage + * collection; if not, exit without performing any work. Some JGit commands + * run autoGC after performing operations that could create many loose + * objects. + *

    + * Currently this option is supported for repositories of type + * {@code FileRepository} only. See {@link GC#setAuto(boolean)} for + * configuration details. + * + * @param monitor + * to report progress + * @since 4.6 + */ + public void autoGC(ProgressMonitor monitor) { + // default does nothing + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java index cc20d50a7f..393e25a2a8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java @@ -56,7 +56,9 @@ import java.util.List; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.UnpackException; +import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.transport.ReceiveCommand.Result; import org.eclipse.jgit.transport.RefAdvertiser.PacketLineOutRefAdvertiser; @@ -307,9 +309,19 @@ public class ReceivePack extends BaseReceivePack { throw new UnpackException(unpackError); } postReceive.onPostReceive(this, filterCommands(Result.OK)); + autoGc(); } } + private void autoGc() { + Repository repo = getRepository(); + if (!repo.getConfig().getBoolean(ConfigConstants.CONFIG_RECEIVE_SECTION, + ConfigConstants.CONFIG_KEY_AUTOGC, true)) { + return; + } + repo.autoGC(NullProgressMonitor.INSTANCE); + } + @Override protected String getLockMessageProcessName() { return "jgit receive-pack"; //$NON-NLS-1$ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/Transport.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/Transport.java index bc4843a8af..df860695df 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/Transport.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/Transport.java @@ -1199,6 +1199,9 @@ public abstract class Transport implements AutoCloseable { final FetchResult result = new FetchResult(); new FetchProcess(this, toFetch).execute(monitor, result); + + local.autoGC(monitor); + return result; } -- 2.39.5