/* * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com> * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> * Copyright (C) 2014, Gustaf Lundh <gustaf.lundh@sonymobile.com> and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.revwalk; import static org.eclipse.jgit.internal.storage.commitgraph.CommitGraph.EMPTY; import java.io.IOException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.Iterator; import java.util.List; import org.eclipse.jgit.annotations.NonNull; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.RevWalkException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.AsyncObjectLoaderQueue; import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdOwnerMap; import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.util.References; /** * Walks a commit graph and produces the matching commits in order. * <p> * A RevWalk instance can only be used once to generate results. Running a * second time requires creating a new RevWalk instance, or invoking * {@link #reset()} before starting again. Resetting an existing instance may be * faster for some applications as commit body parsing can be avoided on the * later invocations. * <p> * RevWalk instances are not thread-safe. Applications must either restrict * usage of a RevWalk instance to a single thread, or implement their own * synchronization at a higher level. * <p> * Multiple simultaneous RevWalk instances per * {@link org.eclipse.jgit.lib.Repository} are permitted, even from concurrent * threads. Equality of {@link org.eclipse.jgit.revwalk.RevCommit}s from two * different RevWalk instances is never true, even if their * {@link org.eclipse.jgit.lib.ObjectId}s are equal (and thus they describe the * same commit). * <p> * The offered iterator is over the list of RevCommits described by the * configuration of this instance. Applications should restrict themselves to * using either the provided Iterator or {@link #next()}, but never use both on * the same RevWalk at the same time. The Iterator may buffer RevCommits, while * {@link #next()} does not. */ public class RevWalk implements Iterable<RevCommit>, AutoCloseable { private static final int MB = 1 << 20; /** * Set on objects whose important header data has been loaded. * <p> * For a RevCommit this indicates we have pulled apart the tree and parent * references from the raw bytes available in the repository and translated * those to our own local RevTree and RevCommit instances. The raw buffer is * also available for message and other header filtering. * <p> * For a RevTag this indicates we have pulled part the tag references to * find out who the tag refers to, and what that object's type is. */ static final int PARSED = 1 << 0; /** * Set on RevCommit instances added to our {@link #pending} queue. * <p> * We use this flag to avoid adding the same commit instance twice to our * queue, especially if we reached it by more than one path. */ static final int SEEN = 1 << 1; /** * Set on RevCommit instances the caller does not want output. * <p> * We flag commits as uninteresting if the caller does not want commits * reachable from a commit given to {@link #markUninteresting(RevCommit)}. * This flag is always carried into the commit's parents and is a key part * of the "rev-list B --not A" feature; A is marked UNINTERESTING. */ static final int UNINTERESTING = 1 << 2; /** * Set on a RevCommit that can collapse out of the history. * <p> * If the {@link #treeFilter} concluded that this commit matches his * parents' for all of the paths that the filter is interested in then we * mark the commit REWRITE. Later we can rewrite the parents of a REWRITE * child to remove chains of REWRITE commits before we produce the child to * the application. * * @see RewriteGenerator */ static final int REWRITE = 1 << 3; /** * Temporary mark for use within generators or filters. * <p> * This mark is only for local use within a single scope. If someone sets * the mark they must unset it before any other code can see the mark. */ static final int TEMP_MARK = 1 << 4; /** * Temporary mark for use within {@link TopoSortGenerator}. * <p> * This mark indicates the commit could not produce when it wanted to, as at * least one child was behind it. Commits with this flag are delayed until * all children have been output first. */ static final int TOPO_DELAY = 1 << 5; /** * Temporary mark for use within {@link TopoNonIntermixSortGenerator}. * <p> * This mark indicates the commit has been queued for emission in * {@link TopoSortGenerator} and can be produced. This mark is removed when * the commit has been produced. */ static final int TOPO_QUEUED = 1 << 6; /** * Set on a RevCommit when a {@link TreeRevFilter} has been applied. * <p> * This flag is processed by the {@link RewriteGenerator} to check if a * {@link TreeRevFilter} has been applied. * * @see TreeRevFilter * @see RewriteGenerator */ static final int TREE_REV_FILTER_APPLIED = 1 << 7; /** * Number of flag bits we keep internal for our own use. See above flags. */ static final int RESERVED_FLAGS = 8; private static final int APP_FLAGS = -1 & ~((1 << RESERVED_FLAGS) - 1); final ObjectReader reader; private final boolean closeReader; final MutableObjectId idBuffer; ObjectIdOwnerMap<RevObject> objects; int freeFlags = APP_FLAGS; private int delayFreeFlags; private int retainOnReset; int carryFlags = UNINTERESTING; final ArrayList<RevCommit> roots; AbstractRevQueue queue; Generator pending; private final EnumSet<RevSort> sorting; private RevFilter filter; private TreeFilter treeFilter; private CommitGraph commitGraph; private boolean retainBody = true; private boolean rewriteParents = true; private boolean firstParent; boolean shallowCommitsInitialized; private enum GetMergedIntoStrategy { RETURN_ON_FIRST_FOUND, RETURN_ON_FIRST_NOT_FOUND, EVALUATE_ALL } /** * Create a new revision walker for a given repository. * * @param repo * the repository the walker will obtain data from. An * ObjectReader will be created by the walker, and will be closed * when the walker is closed. */ public RevWalk(Repository repo) { this(repo.newObjectReader(), true); } /** * Create a new revision walker for a given repository. * <p> * * @param or * the reader the walker will obtain data from. The reader is not * closed when the walker is closed (but is closed by * {@link #dispose()}. */ public RevWalk(ObjectReader or) { this(or, false); } RevWalk(ObjectReader or, boolean closeReader) { reader = or; idBuffer = new MutableObjectId(); objects = new ObjectIdOwnerMap<>(); roots = new ArrayList<>(); queue = new DateRevQueue(false); pending = new StartGenerator(this); sorting = EnumSet.of(RevSort.NONE); filter = RevFilter.ALL; treeFilter = TreeFilter.ALL; this.closeReader = closeReader; commitGraph = null; } /** * Get the reader this walker is using to load objects. * * @return the reader this walker is using to load objects. */ public ObjectReader getObjectReader() { return reader; } /** * Get a reachability checker for commits over this revwalk. * * @return the most efficient reachability checker for this repository. * @throws IOException * if it cannot open any of the underlying indices. * * @since 5.4 * @deprecated use {@code ObjectReader#createReachabilityChecker(RevWalk)} * instead. */ @Deprecated public final ReachabilityChecker createReachabilityChecker() throws IOException { return reader.createReachabilityChecker(this); } /** * {@inheritDoc} * <p> * Release any resources used by this walker's reader. * <p> * A walker that has been released can be used again, but may need to be * released after the subsequent usage. * * @since 4.0 */ @Override public void close() { if (closeReader) { reader.close(); } } /** * Mark a commit to start graph traversal from. * <p> * Callers are encouraged to use {@link #parseCommit(AnyObjectId)} to obtain * the commit reference, rather than {@link #lookupCommit(AnyObjectId)}, as * this method requires the commit to be parsed before it can be added as a * root for the traversal. * <p> * The method will automatically parse an unparsed commit, but error * handling may be more difficult for the application to explain why a * RevCommit is not actually a commit. The object pool of this walker would * also be 'poisoned' by the non-commit RevCommit. * * @param c * the commit to start traversing from. The commit passed must be * from this same revision walker. * @throws org.eclipse.jgit.errors.MissingObjectException * the commit supplied is not available from the object * database. This usually indicates the supplied commit is * invalid, but the reference was constructed during an earlier * invocation to {@link #lookupCommit(AnyObjectId)}. * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException * the object was not parsed yet and it was discovered during * parsing that it is not actually a commit. This usually * indicates the caller supplied a non-commit SHA-1 to * {@link #lookupCommit(AnyObjectId)}. * @throws java.io.IOException * a pack file or loose object could not be read. */ public void markStart(RevCommit c) throws MissingObjectException, IncorrectObjectTypeException, IOException { if ((c.flags & SEEN) != 0) return; if ((c.flags & PARSED) == 0) c.parseHeaders(this); c.flags |= SEEN; roots.add(c); queue.add(c); } /** * Mark commits to start graph traversal from. * * @param list * commits to start traversing from. The commits passed must be * from this same revision walker. * @throws org.eclipse.jgit.errors.MissingObjectException * one of the commits supplied is not available from the object * database. This usually indicates the supplied commit is * invalid, but the reference was constructed during an earlier * invocation to {@link #lookupCommit(AnyObjectId)}. * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException * the object was not parsed yet and it was discovered during * parsing that it is not actually a commit. This usually * indicates the caller supplied a non-commit SHA-1 to * {@link #lookupCommit(AnyObjectId)}. * @throws java.io.IOException * a pack file or loose object could not be read. */ public void markStart(Collection<RevCommit> list) throws MissingObjectException, IncorrectObjectTypeException, IOException { for (RevCommit c : list) markStart(c); } /** * Mark a commit to not produce in the output. * <p> * Uninteresting commits denote not just themselves but also their entire * ancestry chain, back until the merge base of an uninteresting commit and * an otherwise interesting commit. * <p> * Callers are encouraged to use {@link #parseCommit(AnyObjectId)} to obtain * the commit reference, rather than {@link #lookupCommit(AnyObjectId)}, as * this method requires the commit to be parsed before it can be added as a * root for the traversal. * <p> * The method will automatically parse an unparsed commit, but error * handling may be more difficult for the application to explain why a * RevCommit is not actually a commit. The object pool of this walker would * also be 'poisoned' by the non-commit RevCommit. * * @param c * the commit to start traversing from. The commit passed must be * from this same revision walker. * @throws org.eclipse.jgit.errors.MissingObjectException * the commit supplied is not available from the object * database. This usually indicates the supplied commit is * invalid, but the reference was constructed during an earlier * invocation to {@link #lookupCommit(AnyObjectId)}. * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException * the object was not parsed yet and it was discovered during * parsing that it is not actually a commit. This usually * indicates the caller supplied a non-commit SHA-1 to * {@link #lookupCommit(AnyObjectId)}. * @throws java.io.IOException * a pack file or loose object could not be read. */ public void markUninteresting(RevCommit c) throws MissingObjectException, IncorrectObjectTypeException, IOException { c.flags |= UNINTERESTING; carryFlagsImpl(c); markStart(c); } /** * Determine if a commit is reachable from another commit. * <p> * A commit <code>base</code> is an ancestor of <code>tip</code> if we can * find a path of commits that leads from <code>tip</code> and ends at * <code>base</code>. * <p> * This utility function resets the walker, inserts the two supplied * commits, and then executes a walk until an answer can be obtained. * Currently allocated RevFlags that have been added to RevCommit instances * will be retained through the reset. * * @param base * commit the caller thinks is reachable from <code>tip</code>. * @param tip * commit to start iteration from, and which is most likely a * descendant (child) of <code>base</code>. * @return true if there is a path directly from <code>tip</code> to * <code>base</code> (and thus <code>base</code> is fully merged * into <code>tip</code>); false otherwise. * @throws org.eclipse.jgit.errors.MissingObjectException * one or more of the next commit's parents are not available * from the object database, but were thought to be candidates * for traversal. This usually indicates a broken link. * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException * one or more of the next commit's parents are not actually * commit objects. * @throws java.io.IOException * a pack file or loose object could not be read. */ public boolean isMergedInto(RevCommit base, RevCommit tip) throws MissingObjectException, IncorrectObjectTypeException, IOException { final RevFilter oldRF = filter; final TreeFilter oldTF = treeFilter; try { finishDelayedFreeFlags(); reset(~freeFlags & APP_FLAGS); filter = RevFilter.MERGE_BASE; treeFilter = TreeFilter.ALL; markStart(tip); markStart(base); RevCommit mergeBase; while ((mergeBase = next()) != null) { if (References.isSameObject(mergeBase, base)) { return true; } } return false; } finally { filter = oldRF; treeFilter = oldTF; } } /** * Determine the Refs into which a commit is merged. * <p> * A commit is merged into a ref if we can find a path of commits that leads * from that specific ref and ends at <code>commit</code>. * <p> * * @param commit * commit the caller thinks is reachable from <code>refs</code>. * @param refs * refs to start iteration from, and which is most likely a * descendant (child) of <code>commit</code>. * @return list of refs that are reachable from <code>commit</code>. * @throws java.io.IOException * a pack file or loose object could not be read. * @since 5.12 */ public List<Ref> getMergedInto(RevCommit commit, Collection<Ref> refs) throws IOException { return getMergedInto(commit, refs, NullProgressMonitor.INSTANCE); } /** * Determine the Refs into which a commit is merged. * <p> * A commit is merged into a ref if we can find a path of commits that leads * from that specific ref and ends at <code>commit</code>. * <p> * * @param commit * commit the caller thinks is reachable from <code>refs</code>. * @param refs * refs to start iteration from, and which is most likely a * descendant (child) of <code>commit</code>. * @param monitor * the callback for progress and cancellation * @return list of refs that are reachable from <code>commit</code>. * @throws java.io.IOException * a pack file or loose object could not be read. * @since 5.12 */ public List<Ref> getMergedInto(RevCommit commit, Collection<Ref> refs, ProgressMonitor monitor) throws IOException { return getMergedInto(commit, refs, GetMergedIntoStrategy.EVALUATE_ALL, monitor); } /** * Determine if a <code>commit</code> is merged into any of the given * <code>refs</code>. * * @param commit * commit the caller thinks is reachable from <code>refs</code>. * @param refs * refs to start iteration from, and which is most likely a * descendant (child) of <code>commit</code>. * @return true if commit is merged into any of the refs; false otherwise. * @throws java.io.IOException * a pack file or loose object could not be read. * @since 5.12 */ public boolean isMergedIntoAny(RevCommit commit, Collection<Ref> refs) throws IOException { return getMergedInto(commit, refs, GetMergedIntoStrategy.RETURN_ON_FIRST_FOUND, NullProgressMonitor.INSTANCE).size() > 0; } /** * Determine if a <code>commit</code> is merged into all of the given * <code>refs</code>. * * @param commit * commit the caller thinks is reachable from <code>refs</code>. * @param refs * refs to start iteration from, and which is most likely a * descendant (child) of <code>commit</code>. * @return true if commit is merged into all of the refs; false otherwise. * @throws java.io.IOException * a pack file or loose object could not be read. * @since 5.12 */ public boolean isMergedIntoAll(RevCommit commit, Collection<Ref> refs) throws IOException { return getMergedInto(commit, refs, GetMergedIntoStrategy.RETURN_ON_FIRST_NOT_FOUND, NullProgressMonitor.INSTANCE).size() == refs.size(); } private List<Ref> getMergedInto(RevCommit needle, Collection<Ref> haystacks, Enum returnStrategy, ProgressMonitor monitor) throws IOException { List<Ref> result = new ArrayList<>(); List<RevCommit> uninteresting = new ArrayList<>(); List<RevCommit> marked = new ArrayList<>(); RevFilter oldRF = filter; TreeFilter oldTF = treeFilter; try { finishDelayedFreeFlags(); reset(~freeFlags & APP_FLAGS); filter = RevFilter.ALL; treeFilter = TreeFilter.ALL; // Make sure commit is parsed from commit-graph if ((needle.flags & PARSED) == 0) { needle.parseHeaders(this); } int cutoff = needle.getGeneration(); for (Ref r : haystacks) { if (monitor.isCancelled()) { return result; } monitor.update(1); RevObject o = peel(parseAny(r.getObjectId())); if (!(o instanceof RevCommit)) { continue; } RevCommit c = (RevCommit) o; reset(UNINTERESTING | TEMP_MARK); markStart(c); boolean commitFound = false; RevCommit next; while ((next = next()) != null) { if (next.getGeneration() < cutoff) { markUninteresting(next); uninteresting.add(next); } if (References.isSameObject(next, needle) || (next.flags & TEMP_MARK) != 0) { result.add(r); if (returnStrategy == GetMergedIntoStrategy.RETURN_ON_FIRST_FOUND) { return result; } commitFound = true; c.flags |= TEMP_MARK; marked.add(c); break; } } if (!commitFound) { markUninteresting(c); uninteresting.add(c); if (returnStrategy == GetMergedIntoStrategy.RETURN_ON_FIRST_NOT_FOUND) { return result; } } } } finally { roots.addAll(uninteresting); filter = oldRF; treeFilter = oldTF; for (RevCommit c : marked) { c.flags &= ~TEMP_MARK; } } return result; } /** * Pop the next most recent commit. * * @return next most recent commit; null if traversal is over. * @throws org.eclipse.jgit.errors.MissingObjectException * one or more of the next commit's parents are not available * from the object database, but were thought to be candidates * for traversal. This usually indicates a broken link. * @throws org.eclipse.jgit.errors.IncorrectObjectTypeException * one or more of the next commit's parents are not actually * commit objects. * @throws java.io.IOException * a pack file or loose object could not be read. */ public RevCommit next() throws MissingObjectException, IncorrectObjectTypeException, IOException { return pending.next(); } /** * Obtain the sort types applied to the commits returned. * * @return the sorting strategies employed. At least one strategy is always * used, but that strategy may be * {@link org.eclipse.jgit.revwalk.RevSort#NONE}. */ public EnumSet<RevSort> getRevSort() { return sorting.clone(); } /** * Check whether the provided sorting strategy is enabled. * * @param sort * a sorting strategy to look for. * @return true if this strategy is enabled, false otherwise */ public boolean hasRevSort(RevSort sort) { return sorting.contains(sort); } /** * Select a single sorting strategy for the returned commits. * <p> * Disables all sorting strategies, then enables only the single strategy * supplied by the caller. * * @param s * a sorting strategy to enable. */ public void sort(RevSort s) { assertNotStarted(); sorting.clear(); sorting.add(s); } /** * Add or remove a sorting strategy for the returned commits. * <p> * Multiple strategies can be applied at once, in which case some strategies * may take precedence over others. As an example, * {@link org.eclipse.jgit.revwalk.RevSort#TOPO} must take precedence over * {@link org.eclipse.jgit.revwalk.RevSort#COMMIT_TIME_DESC}, otherwise it * cannot enforce its ordering. * * @param s * a sorting strategy to enable or disable. * @param use * true if this strategy should be used, false if it should be * removed. */ public void sort(RevSort s, boolean use) { assertNotStarted(); if (use) sorting.add(s); else sorting.remove(s); if (sorting.size() > 1) sorting.remove(RevSort.NONE); else if (sorting.isEmpty()) sorting.add(RevSort.NONE); } /** * Get the currently configured commit filter. * * @return the current filter. Never null as a filter is always needed. */ @NonNull public RevFilter getRevFilter() { return filter; } /** * Set the commit filter for this walker. * <p> * Multiple filters may be combined by constructing an arbitrary tree of * <code>AndRevFilter</code> or <code>OrRevFilter</code> instances to * describe the boolean expression required by the application. Custom * filter implementations may also be constructed by applications. * <p> * Note that filters are not thread-safe and may not be shared by concurrent * RevWalk instances. Every RevWalk must be supplied its own unique filter, * unless the filter implementation specifically states it is (and always * will be) thread-safe. Callers may use * {@link org.eclipse.jgit.revwalk.filter.RevFilter#clone()} to create a * unique filter tree for this RevWalk instance. * * @param newFilter * the new filter. If null the special * {@link org.eclipse.jgit.revwalk.filter.RevFilter#ALL} filter * will be used instead, as it matches every commit. * @see org.eclipse.jgit.revwalk.filter.AndRevFilter * @see org.eclipse.jgit.revwalk.filter.OrRevFilter */ public void setRevFilter(RevFilter newFilter) { assertNotStarted(); filter = newFilter != null ? newFilter : RevFilter.ALL; } /** * Get the tree filter used to simplify commits by modified paths. * * @return the current filter. Never null as a filter is always needed. If * no filter is being applied * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ALL} is * returned. */ @NonNull public TreeFilter getTreeFilter() { return treeFilter; } /** * Set the tree filter used to simplify commits by modified paths. * <p> * If null or {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ALL} the * path limiter is removed. Commits will not be simplified. * <p> * If non-null and not * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ALL} then the tree * filter will be installed. Commits will have their ancestry simplified to * hide commits that do not contain tree entries matched by the filter, * unless {@code setRewriteParents(false)} is called. * <p> * Usually callers should be inserting a filter graph including * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ANY_DIFF} along with * one or more {@link org.eclipse.jgit.treewalk.filter.PathFilter} * instances. * * @param newFilter * new filter. If null the special * {@link org.eclipse.jgit.treewalk.filter.TreeFilter#ALL} filter * will be used instead, as it matches everything. * @see org.eclipse.jgit.treewalk.filter.PathFilter */ public void setTreeFilter(TreeFilter newFilter) { assertNotStarted(); treeFilter = newFilter != null ? newFilter : TreeFilter.ALL; } /** * Set whether to rewrite parent pointers when filtering by modified paths. * <p> * By default, when {@link #setTreeFilter(TreeFilter)} is called with non- * null and non-{@link org.eclipse.jgit.treewalk.filter.TreeFilter#ALL} * filter, commits will have their ancestry simplified and parents rewritten * to hide commits that do not match the filter. * <p> * This behavior can be bypassed by passing false to this method. * * @param rewrite * whether to rewrite parents; defaults to true. * @since 3.4 */ public void setRewriteParents(boolean rewrite) { rewriteParents = rewrite; } boolean getRewriteParents() { return rewriteParents; } /** * Should the body of a commit or tag be retained after parsing its headers? * <p> * Usually the body is always retained, but some application code might not * care and would prefer to discard the body of a commit as early as * possible, to reduce memory usage. * <p> * True by default on {@link org.eclipse.jgit.revwalk.RevWalk} and false by * default for {@link org.eclipse.jgit.revwalk.ObjectWalk}. * * @return true if the body should be retained; false it is discarded. */ public boolean isRetainBody() { return retainBody; } /** * Set whether or not the body of a commit or tag is retained. * <p> * If a body of a commit or tag is not retained, the application must call * {@link #parseBody(RevObject)} before the body can be safely accessed * through the type specific access methods. * <p> * True by default on {@link org.eclipse.jgit.revwalk.RevWalk} and false by * default for {@link org.eclipse.jgit.revwalk.ObjectWalk}. * * @param retain * true to retain bodies; false to discard them early. */ public void setRetainBody(boolean retain) { retainBody = retain; } /** * @return whether only first-parent links should be followed when walking. * * @since 5.5 */ public boolean isFirstParent() { return firstParent; } /** * Set whether or not only first parent links should be followed. * <p> * If set, second- and higher-parent links are not traversed at all. * <p> * This must be called prior to {@link #markStart(RevCommit)}. * * @param enable * true to walk only first-parent links.