123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- /*
- * Copyright (C) 2008, Google Inc.
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- package org.eclipse.jgit.treewalk;
-
- import java.io.IOException;
-
- import org.eclipse.jgit.annotations.Nullable;
- import org.eclipse.jgit.errors.CorruptObjectException;
- import org.eclipse.jgit.lib.FileMode;
- import org.eclipse.jgit.lib.ObjectReader;
- import org.eclipse.jgit.lib.Repository;
-
- /**
- * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
- * <p>
- * Due to the way a Git tree is organized the standard
- * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
- * when merging two or more trees together. In the standard TreeWalk the file
- * will be returned first, and then much later the directory will be returned.
- * This makes it impossible for the application to efficiently detect and handle
- * the conflict.
- * <p>
- * Using this walk implementation causes the directory to report earlier than
- * usual, at the same time as the non-directory entry. This permits the
- * application to handle the D/F conflict in a single step. The directory is
- * returned only once, so it does not get returned later in the iteration.
- * <p>
- * When a D/F conflict is detected
- * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
- * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
- * the subtree, no matter which iterator originally supplied the subtree.
- * <p>
- * Because conflicted directories report early, using this walk implementation
- * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
- * automatic resorting to run and fix the entry ordering.
- * <p>
- * This walk implementation requires more CPU to implement a look-ahead and a
- * look-behind to merge a D/F pair together, or to skip a previously reported
- * directory. In typical Git repositories the look-ahead cost is 0 and the
- * look-behind doesn't trigger, as users tend not to create trees which contain
- * both "foo" as a directory and "foo.c" as a file.
- * <p>
- * In the worst-case however several thousand look-ahead steps per walk step may
- * be necessary, making the overhead quite significant. Since this worst-case
- * should never happen this walk implementation has made the time/space tradeoff
- * in favor of more-time/less-space, as that better suits the typical case.
- */
- public class NameConflictTreeWalk extends TreeWalk {
- private static final int TREE_MODE = FileMode.TREE.getBits();
-
- private boolean fastMinHasMatch;
-
- private AbstractTreeIterator dfConflict;
-
- /**
- * Create a new tree walker for a given repository.
- *
- * @param repo
- * the repository the walker will obtain data from.
- */
- public NameConflictTreeWalk(final Repository repo) {
- super(repo);
- }
-
- /**
- * Create a new tree walker for a given repository.
- *
- * @param repo
- * the repository the walker will obtain data from.
- * @param or
- * the reader the walker will obtain tree data from.
- * @since 4.3
- */
- public NameConflictTreeWalk(@Nullable Repository repo, final ObjectReader or) {
- super(repo, or);
- }
-
- /**
- * Create a new tree walker for a given repository.
- *
- * @param or
- * the reader the walker will obtain tree data from.
- */
- public NameConflictTreeWalk(final ObjectReader or) {
- super(or);
- }
-
- @Override
- AbstractTreeIterator min() throws CorruptObjectException {
- for (;;) {
- final AbstractTreeIterator minRef = fastMin();
- if (fastMinHasMatch)
- return minRef;
-
- if (isTree(minRef)) {
- if (skipEntry(minRef)) {
- for (final AbstractTreeIterator t : trees) {
- if (t.matches == minRef) {
- t.next(1);
- t.matches = null;
- }
- }
- continue;
- }
- return minRef;
- }
-
- return combineDF(minRef);
- }
- }
-
- private AbstractTreeIterator fastMin() {
- fastMinHasMatch = true;
-
- int i = 0;
- AbstractTreeIterator minRef = trees[i];
- while (minRef.eof() && ++i < trees.length)
- minRef = trees[i];
- if (minRef.eof())
- return minRef;
-
- boolean hasConflict = false;
- minRef.matches = minRef;
- while (++i < trees.length) {
- final AbstractTreeIterator t = trees[i];
- if (t.eof())
- continue;
-
- final int cmp = t.pathCompare(minRef);
- if (cmp < 0) {
- if (fastMinHasMatch && isTree(minRef) && !isTree(t)
- && nameEqual(minRef, t)) {
- // We used to be at a tree, but now we are at a file
- // with the same name. Allow the file to match the
- // tree anyway.
- //
- t.matches = minRef;
- hasConflict = true;
- } else {
- fastMinHasMatch = false;
- t.matches = t;
- minRef = t;
- }
- } else if (cmp == 0) {
- // Exact name/mode match is best.
- //
- t.matches = minRef;
- } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
- && nameEqual(t, minRef)) {
- // The minimum is a file (non-tree) but the next entry
- // of this iterator is a tree whose name matches our file.
- // This is a classic D/F conflict and commonly occurs like
- // this, with no gaps in between the file and directory.
- //
- // Use the tree as the minimum instead (see combineDF).
- //
-
- for (int k = 0; k < i; k++) {
- final AbstractTreeIterator p = trees[k];
- if (p.matches == minRef)
- p.matches = t;
- }
- t.matches = t;
- minRef = t;
- hasConflict = true;
- } else
- fastMinHasMatch = false;
- }
-
- if (hasConflict && fastMinHasMatch && dfConflict == null)
- dfConflict = minRef;
- return minRef;
- }
-
- private static boolean nameEqual(final AbstractTreeIterator a,
- final AbstractTreeIterator b) {
- return a.pathCompare(b, TREE_MODE) == 0;
- }
-
- private static boolean isTree(final AbstractTreeIterator p) {
- return FileMode.TREE.equals(p.mode);
- }
-
- private boolean skipEntry(final AbstractTreeIterator minRef)
- throws CorruptObjectException {
- // A tree D/F may have been handled earlier. We need to
- // not report this path if it has already been reported.
- //
- for (final AbstractTreeIterator t : trees) {
- if (t.matches == minRef || t.first())
- continue;
-
- int stepsBack = 0;
- for (;;) {
- stepsBack++;
- t.back(1);
-
- final int cmp = t.pathCompare(minRef, 0);
- if (cmp == 0) {
- // We have already seen this "$path" before. Skip it.
- //
- t.next(stepsBack);
- return true;
- } else if (cmp < 0 || t.first()) {
- // We cannot find "$path" in t; it will never appear.
- //
- t.next(stepsBack);
- break;
- }
- }
- }
-
- // We have never seen the current path before.
- //
- return false;
- }
-
- private AbstractTreeIterator combineDF(final AbstractTreeIterator minRef)
- throws CorruptObjectException {
- // Look for a possible D/F conflict forward in the tree(s)
- // as there may be a "$path/" which matches "$path". Make
- // such entries match this entry.
- //
- AbstractTreeIterator treeMatch = null;
- for (final AbstractTreeIterator t : trees) {
- if (t.matches == minRef || t.eof())
- continue;
-
- for (;;) {
- final int cmp = t.pathCompare(minRef, TREE_MODE);
- if (cmp < 0) {
- // The "$path/" may still appear later.
- //
- t.matchShift++;
- t.next(1);
- if (t.eof()) {
- t.back(t.matchShift);
- t.matchShift = 0;
- break;
- }
- } else if (cmp == 0) {
- // We have a conflict match here.
- //
- t.matches = minRef;
- treeMatch = t;
- break;
- } else {
- // A conflict match is not possible.
- //
- if (t.matchShift != 0) {
- t.back(t.matchShift);
- t.matchShift = 0;
- }
- break;
- }
- }
- }
-
- if (treeMatch != null) {
- // If we do have a conflict use one of the directory
- // matching iterators instead of the file iterator.
- // This way isSubtree is true and isRecursive works.
- //
- for (final AbstractTreeIterator t : trees)
- if (t.matches == minRef)
- t.matches = treeMatch;
-
- if (dfConflict == null)
- dfConflict = treeMatch;
-
- return treeMatch;
- }
-
- return minRef;
- }
-
- @Override
- void popEntriesEqual() throws CorruptObjectException {
- final AbstractTreeIterator ch = currentHead;
- for (int i = 0; i < trees.length; i++) {
- final AbstractTreeIterator t = trees[i];
- if (t.matches == ch) {
- if (t.matchShift == 0)
- t.next(1);
- else {
- t.back(t.matchShift);
- t.matchShift = 0;
- }
- t.matches = null;
- }
- }
-
- if (ch == dfConflict)
- dfConflict = null;
- }
-
- @Override
- void skipEntriesEqual() throws CorruptObjectException {
- final AbstractTreeIterator ch = currentHead;
- for (int i = 0; i < trees.length; i++) {
- final AbstractTreeIterator t = trees[i];
- if (t.matches == ch) {
- if (t.matchShift == 0)
- t.skip();
- else {
- t.back(t.matchShift);
- t.matchShift = 0;
- }
- t.matches = null;
- }
- }
-
- if (ch == dfConflict)
- dfConflict = null;
- }
-
- @Override
- void stopWalk() throws IOException {
- if (!needsStopWalk()) {
- return;
- }
-
- // Name conflicts make aborting early difficult. Multiple paths may
- // exist between the file and directory versions of a name. To ensure
- // the directory version is skipped over (as it was previously visited
- // during the file version step) requires popping up the stack and
- // finishing out each subtree that the walker dove into. Siblings in
- // parents do not need to be recursed into, bounding the cost.
- for (;;) {
- AbstractTreeIterator t = min();
- if (t.eof()) {
- if (depth > 0) {
- exitSubtree();
- popEntriesEqual();
- continue;
- }
- return;
- }
- currentHead = t;
- skipEntriesEqual();
- }
- }
-
- private boolean needsStopWalk() {
- for (AbstractTreeIterator t : trees) {
- if (t.needsStopWalk()) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * True if the current entry is covered by a directory/file conflict.
- *
- * This means that for some prefix of the current entry's path, this walk
- * has detected a directory/file conflict. Also true if the current entry
- * itself is a directory/file conflict.
- *
- * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
- * true then you know that either for path foo or for path foo/bar files and
- * folders were detected.
- *
- * @return <code>true</code> if the current entry is covered by a
- * directory/file conflict, <code>false</code> otherwise
- */
- public boolean isDirectoryFileConflict() {
- return dfConflict != null;
- }
- }
|