From ae26fa19b7945960fc6e8bf2ab64ec128836ea6d Mon Sep 17 00:00:00 2001 From: Ivan Frade Date: Wed, 1 Apr 2020 22:10:09 -0700 Subject: [PATCH] UploadPack: Extract walk-based reachability check Preparing the code to optimize the bitmap-based object reachability checker. We are mirroring first the commit reachability checker structure (interface + 2 implementations). Move the walk-base reachability checker to its own class. This class is public at the moment. Later ObjectWalk will return an interface and this implementation will be package-private. Change-Id: Ifac70094e1af137291c3607d95e689992f814b26 Signed-off-by: Ivan Frade --- .../PedestrianObjectReachabilityTest.java | 157 ++++++++++++++++++ .../PedestrianObjectReachabilityChecker.java | 103 ++++++++++++ .../eclipse/jgit/transport/UploadPack.java | 45 ++--- 3 files changed, 272 insertions(+), 33 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java new file mode 100644 index 0000000000..f8fc0c9719 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.revwalk; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import java.util.Optional; +import java.util.stream.Stream; + +import org.eclipse.jgit.internal.storage.file.FileRepository; +import org.eclipse.jgit.junit.LocalDiskRepositoryTestCase; +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.junit.TestRepository.CommitBuilder; +import org.junit.Before; +import org.junit.Test; + +public class PedestrianObjectReachabilityTest + extends LocalDiskRepositoryTestCase { + + PedestrianObjectReachabilityChecker getChecker( + TestRepository repository) + throws Exception { + return new PedestrianObjectReachabilityChecker( + repository.getRevWalk().toObjectWalkWithSameObjects()); + } + + private TestRepository repo; + + private AddressableRevCommit baseCommit; + + private AddressableRevCommit branchACommit; + + private AddressableRevCommit branchBCommit; + + private AddressableRevCommit mergeCommit; + + /** {@inheritDoc} */ + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + FileRepository db = createWorkRepository(); + repo = new TestRepository<>(db); + prepareRepo(); + } + + @Test + public void blob_in_base_reachable_from_branches() throws Exception { + PedestrianObjectReachabilityChecker checker = getChecker(repo); + + RevObject baseBlob = baseCommit.blob; + assertReachable("reachable from one branch", checker.areAllReachable( + Arrays.asList(baseBlob), Stream.of(branchACommit.commit))); + assertReachable("reachable from another branch", + checker.areAllReachable( + Arrays.asList(baseBlob), Stream.of(branchBCommit.commit))); + } + + @Test + public void blob_reachable_from_owning_commit() throws Exception { + PedestrianObjectReachabilityChecker checker = getChecker(repo); + + RevObject branchABlob = branchACommit.blob; + assertReachable("reachable from itself", + checker.areAllReachable(Arrays.asList(branchABlob), + Stream.of(branchACommit.commit))); + } + + @Test + public void blob_in_branch_reachable_from_merge() throws Exception { + PedestrianObjectReachabilityChecker checker = getChecker(repo); + + RevObject branchABlob = branchACommit.blob; + assertReachable("reachable from merge", checker.areAllReachable( + Arrays.asList(branchABlob), Stream.of(mergeCommit.commit))); + } + + @Test + public void blob_unreachable_from_earlier_commit() throws Exception { + PedestrianObjectReachabilityChecker checker = getChecker(repo); + + RevObject branchABlob = branchACommit.blob; + assertUnreachable("unreachable from earlier commit", + checker.areAllReachable(Arrays.asList(branchABlob), + Stream.of(baseCommit.commit))); + } + + @Test + public void blob_unreachable_from_parallel_branch() throws Exception { + PedestrianObjectReachabilityChecker checker = getChecker(repo); + + RevObject branchABlob = branchACommit.blob; + assertUnreachable("unreachable from another branch", + checker.areAllReachable(Arrays.asList(branchABlob), + Stream.of(branchBCommit.commit))); + } + + private void prepareRepo() + throws Exception { + baseCommit = createCommit("base"); + branchACommit = createCommit("branchA", baseCommit); + branchBCommit = createCommit("branchB", baseCommit); + mergeCommit = createCommit("merge", branchACommit, branchBCommit); + + // Bitmaps are generated from references + // TODO(ifrade): These are not needed now, but will be when we use these + // tests with the bitmap implementation. + repo.update("refs/heads/a", branchACommit.commit); + repo.update("refs/heads/b", branchBCommit.commit); + repo.update("refs/heads/merge", mergeCommit.commit); + } + + private AddressableRevCommit createCommit(String blobPath, + AddressableRevCommit... parents) throws Exception { + RevBlob blob = repo.blob(blobPath + " content"); + CommitBuilder commitBuilder = repo.commit(); + for (int i = 0; i < parents.length; i++) { + commitBuilder.parent(parents[i].commit); + } + commitBuilder.add(blobPath, blob); + + RevCommit commit = commitBuilder.create(); + return new AddressableRevCommit(commit, blob); + } + + // Pair of commit and blob inside it + static class AddressableRevCommit { + RevCommit commit; + + RevBlob blob; + + AddressableRevCommit(RevCommit commit, RevBlob blob) { + this.commit = commit; + this.blob = blob; + } + } + + private static void assertReachable(String msg, + Optional result) { + assertFalse(msg, result.isPresent()); + } + + private static void assertUnreachable(String msg, + Optional result) { + assertTrue(msg, result.isPresent()); + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java new file mode 100644 index 0000000000..6e027ceafb --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.revwalk; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.Optional; +import java.util.stream.Stream; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AnyObjectId; + +/** + * Checks if all objects are reachable from certain starting points doing a + * walk. + * + * This is an expensive check that browses commits, trees, blobs and tags. For + * reachability just between commits see {@link ReachabilityChecker} + * implementations. + * + * TODO(ifrade): This class won't be public when the interface is introduced. + * Skipping the @since. + */ +public class PedestrianObjectReachabilityChecker { + private ObjectWalk walk; + + /** + * New instance of the reachability checker using a existing walk. + * + * @param walk + * ObjectWalk instance to reuse. Caller retains ownership. + */ + public PedestrianObjectReachabilityChecker(ObjectWalk walk) { + this.walk = walk; + } + + /** + * Checks that all targets are reachable from the starters. + * + * @param targets + * objects we want to reach from the starters + * @param starters + * objects known to be reachable to the caller + * @return Optional with an unreachable target if there is any (there could + * be more than one). Empty optional means all targets are + * reachable. + * @throws MissingObjectException + * An object was missing. This should not happen as the caller + * checked this while doing + * {@link RevWalk#parseAny(AnyObjectId)} to convert ObjectIds to + * RevObjects. + * @throws IncorrectObjectTypeException + * Incorrect object type. As with missing objects, this should + * not happen if the caller used + * {@link RevWalk#parseAny(AnyObjectId)}. + * @throws IOException + * Cannot access underlying storage + */ + public Optional areAllReachable(Collection targets, + Stream starters) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + walk.reset(); + walk.sort(RevSort.TOPO); + for (RevObject target : targets) { + walk.markStart(target); + } + + Iterator iterator = starters.iterator(); + while (iterator.hasNext()) { + RevObject o = iterator.next(); + walk.markUninteresting(o); + + RevObject peeled = walk.peel(o); + if (peeled instanceof RevCommit) { + // By default, for performance reasons, ObjectWalk does not mark + // a tree as uninteresting when we mark a commit. Mark it + // ourselves so that we can determine reachability exactly. + walk.markUninteresting(((RevCommit) peeled).getTree()); + } + } + + RevCommit commit = walk.next(); + if (commit != null) { + return Optional.of(commit); + } + + RevObject object = walk.nextObject(); + if (object != null) { + return Optional.of(object); + } + + return Optional.empty(); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index bb826d8810..0552909c51 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -80,12 +80,12 @@ import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; import org.eclipse.jgit.revwalk.BitmapWalker; import org.eclipse.jgit.revwalk.DepthWalk; import org.eclipse.jgit.revwalk.ObjectWalk; +import org.eclipse.jgit.revwalk.PedestrianObjectReachabilityChecker; import org.eclipse.jgit.revwalk.ReachabilityChecker; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevFlagSet; import org.eclipse.jgit.revwalk.RevObject; -import org.eclipse.jgit.revwalk.RevSort; import org.eclipse.jgit.revwalk.RevTag; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.revwalk.filter.CommitTimeRevFilter; @@ -1910,36 +1910,6 @@ public class UploadPack { } } - private static void checkReachabilityByWalkingObjects(ObjectWalk walk, - List wants, Set reachableFrom) throws IOException { - - walk.sort(RevSort.TOPO); - for (RevObject want : wants) { - walk.markStart(want); - } - for (ObjectId have : reachableFrom) { - RevObject o = walk.parseAny(have); - walk.markUninteresting(o); - - RevObject peeled = walk.peel(o); - if (peeled instanceof RevCommit) { - // By default, for performance reasons, ObjectWalk does not mark a - // tree as uninteresting when we mark a commit. Mark it ourselves so - // that we can determine reachability exactly. - walk.markUninteresting(((RevCommit) peeled).getTree()); - } - } - - RevCommit commit = walk.next(); - if (commit != null) { - throw new WantNotValidException(commit); - } - RevObject object = walk.nextObject(); - if (object != null) { - throw new WantNotValidException(object); - } - } - private static void checkNotAdvertisedWants(UploadPack up, List notAdvertisedWants, Collection visibleRefs) throws IOException { @@ -1967,8 +1937,17 @@ public class UploadPack { // operator is willing to pay the cost of these // reachability checks. try (ObjectWalk objWalk = walk.toObjectWalkWithSameObjects()) { - checkReachabilityByWalkingObjects(objWalk, - wantsAsObjs, reachableFrom); + List havesAsObjs = objectIdsToRevObjects( + objWalk, reachableFrom); + PedestrianObjectReachabilityChecker reachabilityChecker = new PedestrianObjectReachabilityChecker( + objWalk); + Optional unreachable = reachabilityChecker + .areAllReachable(wantsAsObjs, + havesAsObjs.stream()); + if (unreachable.isPresent()) { + throw new WantNotValidException( + unreachable.get()); + } } return; } -- 2.39.5