/* * Copyright (C) 2008, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.treewalk; import java.io.IOException; import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.Repository; /** * Specialized TreeWalk to detect directory-file (D/F) name conflicts. *

* Due to the way a Git tree is organized the standard * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict * when merging two or more trees together. In the standard TreeWalk the file * will be returned first, and then much later the directory will be returned. * This makes it impossible for the application to efficiently detect and handle * the conflict. *

* Using this walk implementation causes the directory to report earlier than * usual, at the same time as the non-directory entry. This permits the * application to handle the D/F conflict in a single step. The directory is * returned only once, so it does not get returned later in the iteration. *

* When a D/F conflict is detected * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into * the subtree, no matter which iterator originally supplied the subtree. *

* Because conflicted directories report early, using this walk implementation * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the * automatic resorting to run and fix the entry ordering. *

* This walk implementation requires more CPU to implement a look-ahead and a * look-behind to merge a D/F pair together, or to skip a previously reported * directory. In typical Git repositories the look-ahead cost is 0 and the * look-behind doesn't trigger, as users tend not to create trees which contain * both "foo" as a directory and "foo.c" as a file. *

* In the worst-case however several thousand look-ahead steps per walk step may * be necessary, making the overhead quite significant. Since this worst-case * should never happen this walk implementation has made the time/space tradeoff * in favor of more-time/less-space, as that better suits the typical case. */ public class NameConflictTreeWalk extends TreeWalk { private static final int TREE_MODE = FileMode.TREE.getBits(); private boolean fastMinHasMatch; private AbstractTreeIterator dfConflict; /** * Create a new tree walker for a given repository. * * @param repo * the repository the walker will obtain data from. */ public NameConflictTreeWalk(Repository repo) { super(repo); } /** * Create a new tree walker for a given repository. * * @param repo * the repository the walker will obtain data from. * @param or * the reader the walker will obtain tree data from. * @since 4.3 */ public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) { super(repo, or); } /** * Create a new tree walker for a given repository. * * @param or * the reader the walker will obtain tree data from. */ public NameConflictTreeWalk(ObjectReader or) { super(or); } @Override AbstractTreeIterator min() throws CorruptObjectException { for (;;) { final AbstractTreeIterator minRef = fastMin(); if (fastMinHasMatch) return minRef; if (isTree(minRef)) { if (skipEntry(minRef)) { for (AbstractTreeIterator t : trees) { if (t.matches == minRef) { t.next(1); t.matches = null; } } continue; } return minRef; } return combineDF(minRef); } } private AbstractTreeIterator fastMin() { fastMinHasMatch = true; int i = 0; AbstractTreeIterator minRef = trees[i]; while (minRef.eof() && ++i < trees.length) minRef = trees[i]; if (minRef.eof()) return minRef; boolean hasConflict = false; minRef.matches = minRef; while (++i < trees.length) { final AbstractTreeIterator t = trees[i]; if (t.eof()) continue; final int cmp = t.pathCompare(minRef); if (cmp < 0) { if (fastMinHasMatch && isTree(minRef) && !isTree(t) && nameEqual(minRef, t)) { // We used to be at a tree, but now we are at a file // with the same name. Allow the file to match the // tree anyway. // t.matches = minRef; hasConflict = true; } else { fastMinHasMatch = false; t.matches = t; minRef = t; } } else if (cmp == 0) { // Exact name/mode match is best. // t.matches = minRef; } else if (fastMinHasMatch && isTree(t) && !isTree(minRef) && !isGitlink(minRef) && nameEqual(t, minRef)) { // The minimum is a file (non-tree) but the next entry // of this iterator is a tree whose name matches our file. // This is a classic D/F conflict and commonly occurs like // this, with no gaps in between the file and directory. // // Use the tree as the minimum instead (see combineDF). // for (int k = 0; k < i; k++) { final AbstractTreeIterator p = trees[k]; if (p.matches == minRef) p.matches = t; } t.matches = t; minRef = t; hasConflict = true; } else fastMinHasMatch = false; } if (hasConflict && fastMinHasMatch && dfConflict == null) dfConflict = minRef; return minRef; } private static boolean nameEqual(final AbstractTreeIterator a, final AbstractTreeIterator b) { return a.pathCompare(b, TREE_MODE) == 0; } private boolean isGitlink(AbstractTreeIterator p) { return FileMode.GITLINK.equals(p.mode); } private static boolean isTree(AbstractTreeIterator p) { return FileMode.TREE.equals(p.mode); } private boolean skipEntry(AbstractTreeIterator minRef) throws CorruptObjectException { // A tree D/F may have been handled earlier. We need to // not report this path if it has already been reported. // for (AbstractTreeIterator t : trees) { if (t.matches == minRef || t.first()) continue; int stepsBack = 0; for (;;) { stepsBack++; t.back(1); final int cmp = t.pathCompare(minRef, 0); if (cmp == 0) { // We have already seen this "$path" before. Skip it. // t.next(stepsBack); return true; } else if (cmp < 0 || t.first()) { // We cannot find "$path" in t; it will never appear. // t.next(stepsBack); break; } } } // We have never seen the current path before. // return false; } private AbstractTreeIterator combineDF(AbstractTreeIterator minRef) throws CorruptObjectException { // Look for a possible D/F conflict forward in the tree(s) // as there may be a "$path/" which matches "$path". Make // such entries match this entry. // AbstractTreeIterator treeMatch = null; for (AbstractTreeIterator t : trees) { if (t.matches == minRef || t.eof()) continue; for (;;) { final int cmp = t.pathCompare(minRef, TREE_MODE); if (cmp < 0) { // The "$path/" may still appear later. // t.matchShift++; t.next(1); if (t.eof()) { t.back(t.matchShift); t.matchShift = 0; break; } } else if (cmp == 0) { // We have a conflict match here. // t.matches = minRef; treeMatch = t; break; } else { // A conflict match is not possible. // if (t.matchShift != 0) { t.back(t.matchShift); t.matchShift = 0; } break; } } } if (treeMatch != null) { // If we do have a conflict use one of the directory // matching iterators instead of the file iterator. // This way isSubtree is true and isRecursive works. // for (AbstractTreeIterator t : trees) if (t.matches == minRef) t.matches = treeMatch; if (dfConflict == null && !isGitlink(minRef)) { dfConflict = treeMatch; } return treeMatch; } return minRef; } @Override void popEntriesEqual() throws CorruptObjectException { final AbstractTreeIterator ch = currentHead; for (AbstractTreeIterator t : trees) { if (t.matches == ch) { if (t.matchShift == 0) t.next(1); else { t.back(t.matchShift); t.matchShift = 0; } t.matches = null; } } if (ch == dfConflict) dfConflict = null; } @Override void skipEntriesEqual() throws CorruptObjectException { final AbstractTreeIterator ch = currentHead; for (AbstractTreeIterator t : trees) { if (t.matches == ch) { if (t.matchShift == 0) t.skip(); else { t.back(t.matchShift); t.matchShift = 0; } t.matches = null; } } if (ch == dfConflict) dfConflict = null; } @Override void stopWalk() throws IOException { if (!needsStopWalk()) { return; } // Name conflicts make aborting early difficult. Multiple paths may // exist between the file and directory versions of a name. To ensure // the directory version is skipped over (as it was previously visited // during the file version step) requires popping up the stack and // finishing out each subtree that the walker dove into. Siblings in // parents do not need to be recursed into, bounding the cost. for (;;) { AbstractTreeIterator t = min(); if (t.eof()) { if (depth > 0) { exitSubtree(); popEntriesEqual(); continue; } return; } currentHead = t; skipEntriesEqual(); } } private boolean needsStopWalk() { for (AbstractTreeIterator t : trees) { if (t.needsStopWalk()) { return true; } } return false; } /** * True if the current entry is covered by a directory/file conflict. * * This means that for some prefix of the current entry's path, this walk * has detected a directory/file conflict. Also true if the current entry * itself is a directory/file conflict. * * Example: If this TreeWalk points to foo/bar/a.txt and this method returns * true then you know that either for path foo or for path foo/bar files and * folders were detected. * * @return true if the current entry is covered by a * directory/file conflict, false otherwise */ public boolean isDirectoryFileConflict() { return dfConflict != null; } }