diff options
author | Demetr Starshov <dstarshov@google.com> | 2020-05-06 17:53:36 -0700 |
---|---|---|
committer | Demetr Starshov <dstarshov@google.com> | 2020-05-08 17:57:20 -0700 |
commit | 9075beefb1bcde3eea9ccee7e34a74a0f61e7ea2 (patch) | |
tree | a60d2e121ec7bec35c970d7647ab6cf12fcddc8d /org.eclipse.jgit/src/org/eclipse/jgit/internal/transport | |
parent | 519cb1e91b06fbc82b7e87431ac7485bf3c9d91b (diff) | |
download | jgit-9075beefb1bcde3eea9ccee7e34a74a0f61e7ea2.tar.gz jgit-9075beefb1bcde3eea9ccee7e34a74a0f61e7ea2.zip |
ReceivePack: adding IterativeConnectivityChecker
Introduce an IterativeConnectivityChecker which runs a connectivity
check with a filtered set of references, and falls back to using the
full set of advertised references.
It uses references during first check attempt:
- References that are ancestors of an incoming commits (e.g., pushing
a commit onto an existing branch or pushing a new branch based on
another branch)
- Additional list of references we know client can be interested in
(e.g. list of open changes for Gerrit)
We tested it inside Google and it improves connectivity for certain
topologies. For example connectivity counts for
chromium.googlesource.com/chromium/src:
percentile_50: 1923 (was: 22777)
percentile_90: 23272 (was: 353003)
percentile_99: 345522 (was: 353435)
This saved ~2 seconds on every push to this repository.
Signed-off-by: Demetr Starshov <dstarshov@google.com>
Change-Id: I6543c2e10ed04622ca795b195665133e690d3b10
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/internal/transport')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/internal/transport/connectivity/IterativeConnectivityChecker.java | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/transport/connectivity/IterativeConnectivityChecker.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/transport/connectivity/IterativeConnectivityChecker.java new file mode 100644 index 0000000000..b44c4ae5cb --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/transport/connectivity/IterativeConnectivityChecker.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2019, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +package org.eclipse.jgit.internal.transport.connectivity; + +import static java.util.stream.Collectors.toList; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Stream; + +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevObject; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.transport.ConnectivityChecker; +import org.eclipse.jgit.transport.ReceiveCommand; + +/** + * Implementation of connectivity checker which tries to do check with smaller + * set of references first and if it fails will fall back to check against all + * advertised references. + * + * This is useful for big repos with enormous number of references. + */ +public class IterativeConnectivityChecker implements ConnectivityChecker { + private static final int MAXIMUM_PARENTS_TO_CHECK = 128; + + private final ConnectivityChecker delegate; + + private Set<ObjectId> forcedHaves = Collections.emptySet(); + + /** + * @param delegate + * Delegate checker which will be called for actual checks. + */ + public IterativeConnectivityChecker(ConnectivityChecker delegate) { + this.delegate = delegate; + } + + @Override + public void checkConnectivity(ConnectivityCheckInfo connectivityCheckInfo, + Set<ObjectId> advertisedHaves, ProgressMonitor pm) + throws MissingObjectException, IOException { + try { + Set<ObjectId> newRefs = new HashSet<>(); + Set<ObjectId> expectedParents = new HashSet<>(); + + getAllObjectIds(connectivityCheckInfo.getCommands()) + .forEach(oid -> { + if (advertisedHaves.contains(oid)) { + expectedParents.add(oid); + } else { + newRefs.add(oid); + } + }); + if (!newRefs.isEmpty()) { + expectedParents.addAll(extractAdvertisedParentCommits(newRefs, + advertisedHaves, connectivityCheckInfo.getWalk())); + } + + expectedParents.addAll(forcedHaves); + + if (!expectedParents.isEmpty()) { + delegate.checkConnectivity(connectivityCheckInfo, + expectedParents, pm); + return; + } + } catch (MissingObjectException e) { + // This is fine, retry with all haves. + } + delegate.checkConnectivity(connectivityCheckInfo, advertisedHaves, pm); + } + + private static Stream<ObjectId> getAllObjectIds( + List<ReceiveCommand> commands) { + return commands.stream().flatMap(cmd -> { + if (cmd.getType() == ReceiveCommand.Type.UPDATE || cmd + .getType() == ReceiveCommand.Type.UPDATE_NONFASTFORWARD) { + return Stream.of(cmd.getOldId(), cmd.getNewId()); + } else if (cmd.getType() == ReceiveCommand.Type.CREATE) { + return Stream.of(cmd.getNewId()); + } + return Stream.of(); + }); + } + + /** + * Sets additional haves that client can depend on (e.g. gerrit changes). + * + * @param forcedHaves + * Haves server expects client to depend on. + */ + public void setForcedHaves(Set<ObjectId> forcedHaves) { + this.forcedHaves = Collections.unmodifiableSet(forcedHaves); + } + + private static Set<ObjectId> extractAdvertisedParentCommits( + Set<ObjectId> newRefs, Set<ObjectId> advertisedHaves, RevWalk rw) + throws MissingObjectException, IOException { + Set<ObjectId> advertisedParents = new HashSet<>(); + for (ObjectId newRef : newRefs) { + RevObject object = rw.parseAny(newRef); + if (object instanceof RevCommit) { + int numberOfParentsToCheck = 0; + Queue<RevCommit> parents = new ArrayDeque<>( + MAXIMUM_PARENTS_TO_CHECK); + parents.addAll( + parseParents(((RevCommit) object).getParents(), rw)); + // Looking through a chain of ancestors handles the case where a + // series of commits is sent in a single push for a new branch. + while (!parents.isEmpty()) { + RevCommit parentCommit = parents.poll(); + if (advertisedHaves.contains(parentCommit.getId())) { + advertisedParents.add(parentCommit.getId()); + } else if (numberOfParentsToCheck < MAXIMUM_PARENTS_TO_CHECK) { + RevCommit[] grandParents = parentCommit.getParents(); + numberOfParentsToCheck += grandParents.length; + parents.addAll(parseParents(grandParents, rw)); + } + } + } + } + return advertisedParents; + } + + private static List<RevCommit> parseParents(RevCommit[] parents, + RevWalk rw) { + return Arrays.stream(parents).map((commit) -> { + try { + return rw.parseCommit(commit); + } catch (Exception e) { + throw new RuntimeException(e); + } + }).collect(toList()); + } +} |