You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

NameConflictTreeWalk.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /*
  2. * Copyright (C) 2008, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.treewalk;
  44. import java.io.IOException;
  45. import org.eclipse.jgit.annotations.Nullable;
  46. import org.eclipse.jgit.errors.CorruptObjectException;
  47. import org.eclipse.jgit.lib.FileMode;
  48. import org.eclipse.jgit.lib.ObjectReader;
  49. import org.eclipse.jgit.lib.Repository;
  50. /**
  51. * Specialized TreeWalk to detect directory-file (D/F) name conflicts.
  52. * <p>
  53. * Due to the way a Git tree is organized the standard
  54. * {@link org.eclipse.jgit.treewalk.TreeWalk} won't easily find a D/F conflict
  55. * when merging two or more trees together. In the standard TreeWalk the file
  56. * will be returned first, and then much later the directory will be returned.
  57. * This makes it impossible for the application to efficiently detect and handle
  58. * the conflict.
  59. * <p>
  60. * Using this walk implementation causes the directory to report earlier than
  61. * usual, at the same time as the non-directory entry. This permits the
  62. * application to handle the D/F conflict in a single step. The directory is
  63. * returned only once, so it does not get returned later in the iteration.
  64. * <p>
  65. * When a D/F conflict is detected
  66. * {@link org.eclipse.jgit.treewalk.TreeWalk#isSubtree()} will return true and
  67. * {@link org.eclipse.jgit.treewalk.TreeWalk#enterSubtree()} will recurse into
  68. * the subtree, no matter which iterator originally supplied the subtree.
  69. * <p>
  70. * Because conflicted directories report early, using this walk implementation
  71. * to populate a {@link org.eclipse.jgit.dircache.DirCacheBuilder} may cause the
  72. * automatic resorting to run and fix the entry ordering.
  73. * <p>
  74. * This walk implementation requires more CPU to implement a look-ahead and a
  75. * look-behind to merge a D/F pair together, or to skip a previously reported
  76. * directory. In typical Git repositories the look-ahead cost is 0 and the
  77. * look-behind doesn't trigger, as users tend not to create trees which contain
  78. * both "foo" as a directory and "foo.c" as a file.
  79. * <p>
  80. * In the worst-case however several thousand look-ahead steps per walk step may
  81. * be necessary, making the overhead quite significant. Since this worst-case
  82. * should never happen this walk implementation has made the time/space tradeoff
  83. * in favor of more-time/less-space, as that better suits the typical case.
  84. */
  85. public class NameConflictTreeWalk extends TreeWalk {
  86. private static final int TREE_MODE = FileMode.TREE.getBits();
  87. private boolean fastMinHasMatch;
  88. private AbstractTreeIterator dfConflict;
  89. /**
  90. * Create a new tree walker for a given repository.
  91. *
  92. * @param repo
  93. * the repository the walker will obtain data from.
  94. */
  95. public NameConflictTreeWalk(Repository repo) {
  96. super(repo);
  97. }
  98. /**
  99. * Create a new tree walker for a given repository.
  100. *
  101. * @param repo
  102. * the repository the walker will obtain data from.
  103. * @param or
  104. * the reader the walker will obtain tree data from.
  105. * @since 4.3
  106. */
  107. public NameConflictTreeWalk(@Nullable Repository repo, ObjectReader or) {
  108. super(repo, or);
  109. }
  110. /**
  111. * Create a new tree walker for a given repository.
  112. *
  113. * @param or
  114. * the reader the walker will obtain tree data from.
  115. */
  116. public NameConflictTreeWalk(ObjectReader or) {
  117. super(or);
  118. }
  119. @Override
  120. AbstractTreeIterator min() throws CorruptObjectException {
  121. for (;;) {
  122. final AbstractTreeIterator minRef = fastMin();
  123. if (fastMinHasMatch)
  124. return minRef;
  125. if (isTree(minRef)) {
  126. if (skipEntry(minRef)) {
  127. for (AbstractTreeIterator t : trees) {
  128. if (t.matches == minRef) {
  129. t.next(1);
  130. t.matches = null;
  131. }
  132. }
  133. continue;
  134. }
  135. return minRef;
  136. }
  137. return combineDF(minRef);
  138. }
  139. }
  140. private AbstractTreeIterator fastMin() {
  141. fastMinHasMatch = true;
  142. int i = 0;
  143. AbstractTreeIterator minRef = trees[i];
  144. while (minRef.eof() && ++i < trees.length)
  145. minRef = trees[i];
  146. if (minRef.eof())
  147. return minRef;
  148. boolean hasConflict = false;
  149. minRef.matches = minRef;
  150. while (++i < trees.length) {
  151. final AbstractTreeIterator t = trees[i];
  152. if (t.eof())
  153. continue;
  154. final int cmp = t.pathCompare(minRef);
  155. if (cmp < 0) {
  156. if (fastMinHasMatch && isTree(minRef) && !isTree(t)
  157. && nameEqual(minRef, t)) {
  158. // We used to be at a tree, but now we are at a file
  159. // with the same name. Allow the file to match the
  160. // tree anyway.
  161. //
  162. t.matches = minRef;
  163. hasConflict = true;
  164. } else {
  165. fastMinHasMatch = false;
  166. t.matches = t;
  167. minRef = t;
  168. }
  169. } else if (cmp == 0) {
  170. // Exact name/mode match is best.
  171. //
  172. t.matches = minRef;
  173. } else if (fastMinHasMatch && isTree(t) && !isTree(minRef)
  174. && !isGitlink(minRef) && nameEqual(t, minRef)) {
  175. // The minimum is a file (non-tree) but the next entry
  176. // of this iterator is a tree whose name matches our file.
  177. // This is a classic D/F conflict and commonly occurs like
  178. // this, with no gaps in between the file and directory.
  179. //
  180. // Use the tree as the minimum instead (see combineDF).
  181. //
  182. for (int k = 0; k < i; k++) {
  183. final AbstractTreeIterator p = trees[k];
  184. if (p.matches == minRef)
  185. p.matches = t;
  186. }
  187. t.matches = t;
  188. minRef = t;
  189. hasConflict = true;
  190. } else
  191. fastMinHasMatch = false;
  192. }
  193. if (hasConflict && fastMinHasMatch && dfConflict == null)
  194. dfConflict = minRef;
  195. return minRef;
  196. }
  197. private static boolean nameEqual(final AbstractTreeIterator a,
  198. final AbstractTreeIterator b) {
  199. return a.pathCompare(b, TREE_MODE) == 0;
  200. }
  201. private boolean isGitlink(AbstractTreeIterator p) {
  202. return FileMode.GITLINK.equals(p.mode);
  203. }
  204. private static boolean isTree(AbstractTreeIterator p) {
  205. return FileMode.TREE.equals(p.mode);
  206. }
  207. private boolean skipEntry(AbstractTreeIterator minRef)
  208. throws CorruptObjectException {
  209. // A tree D/F may have been handled earlier. We need to
  210. // not report this path if it has already been reported.
  211. //
  212. for (AbstractTreeIterator t : trees) {
  213. if (t.matches == minRef || t.first())
  214. continue;
  215. int stepsBack = 0;
  216. for (;;) {
  217. stepsBack++;
  218. t.back(1);
  219. final int cmp = t.pathCompare(minRef, 0);
  220. if (cmp == 0) {
  221. // We have already seen this "$path" before. Skip it.
  222. //
  223. t.next(stepsBack);
  224. return true;
  225. } else if (cmp < 0 || t.first()) {
  226. // We cannot find "$path" in t; it will never appear.
  227. //
  228. t.next(stepsBack);
  229. break;
  230. }
  231. }
  232. }
  233. // We have never seen the current path before.
  234. //
  235. return false;
  236. }
  237. private AbstractTreeIterator combineDF(AbstractTreeIterator minRef)
  238. throws CorruptObjectException {
  239. // Look for a possible D/F conflict forward in the tree(s)
  240. // as there may be a "$path/" which matches "$path". Make
  241. // such entries match this entry.
  242. //
  243. AbstractTreeIterator treeMatch = null;
  244. for (AbstractTreeIterator t : trees) {
  245. if (t.matches == minRef || t.eof())
  246. continue;
  247. for (;;) {
  248. final int cmp = t.pathCompare(minRef, TREE_MODE);
  249. if (cmp < 0) {
  250. // The "$path/" may still appear later.
  251. //
  252. t.matchShift++;
  253. t.next(1);
  254. if (t.eof()) {
  255. t.back(t.matchShift);
  256. t.matchShift = 0;
  257. break;
  258. }
  259. } else if (cmp == 0) {
  260. // We have a conflict match here.
  261. //
  262. t.matches = minRef;
  263. treeMatch = t;
  264. break;
  265. } else {
  266. // A conflict match is not possible.
  267. //
  268. if (t.matchShift != 0) {
  269. t.back(t.matchShift);
  270. t.matchShift = 0;
  271. }
  272. break;
  273. }
  274. }
  275. }
  276. if (treeMatch != null) {
  277. // If we do have a conflict use one of the directory
  278. // matching iterators instead of the file iterator.
  279. // This way isSubtree is true and isRecursive works.
  280. //
  281. for (AbstractTreeIterator t : trees)
  282. if (t.matches == minRef)
  283. t.matches = treeMatch;
  284. if (dfConflict == null && !isGitlink(minRef)) {
  285. dfConflict = treeMatch;
  286. }
  287. return treeMatch;
  288. }
  289. return minRef;
  290. }
  291. @Override
  292. void popEntriesEqual() throws CorruptObjectException {
  293. final AbstractTreeIterator ch = currentHead;
  294. for (AbstractTreeIterator t : trees) {
  295. if (t.matches == ch) {
  296. if (t.matchShift == 0)
  297. t.next(1);
  298. else {
  299. t.back(t.matchShift);
  300. t.matchShift = 0;
  301. }
  302. t.matches = null;
  303. }
  304. }
  305. if (ch == dfConflict)
  306. dfConflict = null;
  307. }
  308. @Override
  309. void skipEntriesEqual() throws CorruptObjectException {
  310. final AbstractTreeIterator ch = currentHead;
  311. for (AbstractTreeIterator t : trees) {
  312. if (t.matches == ch) {
  313. if (t.matchShift == 0)
  314. t.skip();
  315. else {
  316. t.back(t.matchShift);
  317. t.matchShift = 0;
  318. }
  319. t.matches = null;
  320. }
  321. }
  322. if (ch == dfConflict)
  323. dfConflict = null;
  324. }
  325. @Override
  326. void stopWalk() throws IOException {
  327. if (!needsStopWalk()) {
  328. return;
  329. }
  330. // Name conflicts make aborting early difficult. Multiple paths may
  331. // exist between the file and directory versions of a name. To ensure
  332. // the directory version is skipped over (as it was previously visited
  333. // during the file version step) requires popping up the stack and
  334. // finishing out each subtree that the walker dove into. Siblings in
  335. // parents do not need to be recursed into, bounding the cost.
  336. for (;;) {
  337. AbstractTreeIterator t = min();
  338. if (t.eof()) {
  339. if (depth > 0) {
  340. exitSubtree();
  341. popEntriesEqual();
  342. continue;
  343. }
  344. return;
  345. }
  346. currentHead = t;
  347. skipEntriesEqual();
  348. }
  349. }
  350. private boolean needsStopWalk() {
  351. for (AbstractTreeIterator t : trees) {
  352. if (t.needsStopWalk()) {
  353. return true;
  354. }
  355. }
  356. return false;
  357. }
  358. /**
  359. * True if the current entry is covered by a directory/file conflict.
  360. *
  361. * This means that for some prefix of the current entry's path, this walk
  362. * has detected a directory/file conflict. Also true if the current entry
  363. * itself is a directory/file conflict.
  364. *
  365. * Example: If this TreeWalk points to foo/bar/a.txt and this method returns
  366. * true then you know that either for path foo or for path foo/bar files and
  367. * folders were detected.
  368. *
  369. * @return <code>true</code> if the current entry is covered by a
  370. * directory/file conflict, <code>false</code> otherwise
  371. */
  372. public boolean isDirectoryFileConflict() {
  373. return dfConflict != null;
  374. }
  375. }