You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

AbstractTreeIterator.java 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. /*
  2. * Copyright (C) 2008-2009, Google Inc.
  3. * Copyright (C) 2007, Robin Rosenberg <robin.rosenberg@dewire.com>
  4. * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
  5. * and other copyright owners as documented in the project's IP log.
  6. *
  7. * This program and the accompanying materials are made available
  8. * under the terms of the Eclipse Distribution License v1.0 which
  9. * accompanies this distribution, is reproduced below, and is
  10. * available at http://www.eclipse.org/org/documents/edl-v10.php
  11. *
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or
  15. * without modification, are permitted provided that the following
  16. * conditions are met:
  17. *
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. *
  21. * - Redistributions in binary form must reproduce the above
  22. * copyright notice, this list of conditions and the following
  23. * disclaimer in the documentation and/or other materials provided
  24. * with the distribution.
  25. *
  26. * - Neither the name of the Eclipse Foundation, Inc. nor the
  27. * names of its contributors may be used to endorse or promote
  28. * products derived from this software without specific prior
  29. * written permission.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  32. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  33. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  34. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  35. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  36. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  37. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  38. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  39. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  40. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  43. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  44. */
  45. package org.eclipse.jgit.treewalk;
  46. import java.io.IOException;
  47. import java.nio.ByteBuffer;
  48. import java.nio.CharBuffer;
  49. import org.eclipse.jgit.errors.CorruptObjectException;
  50. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  51. import org.eclipse.jgit.lib.Constants;
  52. import org.eclipse.jgit.lib.FileMode;
  53. import org.eclipse.jgit.lib.MutableObjectId;
  54. import org.eclipse.jgit.lib.ObjectId;
  55. import org.eclipse.jgit.lib.ObjectReader;
  56. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  57. /**
  58. * Walks a Git tree (directory) in Git sort order.
  59. * <p>
  60. * A new iterator instance should be positioned on the first entry, or at eof.
  61. * Data for the first entry (if not at eof) should be available immediately.
  62. * <p>
  63. * Implementors must walk a tree in the Git sort order, which has the following
  64. * odd sorting:
  65. * <ol>
  66. * <li>A.c</li>
  67. * <li>A/c</li>
  68. * <li>A0c</li>
  69. * </ol>
  70. * <p>
  71. * In the second item, <code>A</code> is the name of a subtree and
  72. * <code>c</code> is a file within that subtree. The other two items are files
  73. * in the root level tree.
  74. *
  75. * @see CanonicalTreeParser
  76. */
  77. public abstract class AbstractTreeIterator {
  78. /** Default size for the {@link #path} buffer. */
  79. protected static final int DEFAULT_PATH_SIZE = 128;
  80. /** A dummy object id buffer that matches the zero ObjectId. */
  81. protected static final byte[] zeroid = new byte[Constants.OBJECT_ID_LENGTH];
  82. /** Iterator for the parent tree; null if we are the root iterator. */
  83. final AbstractTreeIterator parent;
  84. /** The iterator this current entry is path equal to. */
  85. AbstractTreeIterator matches;
  86. /**
  87. * Number of entries we moved forward to force a D/F conflict match.
  88. *
  89. * @see NameConflictTreeWalk
  90. */
  91. int matchShift;
  92. /**
  93. * Mode bits for the current entry.
  94. * <p>
  95. * A numerical value from FileMode is usually faster for an iterator to
  96. * obtain from its data source so this is the preferred representation.
  97. *
  98. * @see org.eclipse.jgit.lib.FileMode
  99. */
  100. protected int mode;
  101. /**
  102. * Path buffer for the current entry.
  103. * <p>
  104. * This buffer is pre-allocated at the start of walking and is shared from
  105. * parent iterators down into their subtree iterators. The sharing allows
  106. * the current entry to always be a full path from the root, while each
  107. * subtree only needs to populate the part that is under their control.
  108. */
  109. protected byte[] path;
  110. /**
  111. * Position within {@link #path} this iterator starts writing at.
  112. * <p>
  113. * This is the first offset in {@link #path} that this iterator must
  114. * populate during {@link #next}. At the root level (when {@link #parent}
  115. * is null) this is 0. For a subtree iterator the index before this position
  116. * should have the value '/'.
  117. */
  118. protected final int pathOffset;
  119. /**
  120. * Total length of the current entry's complete path from the root.
  121. * <p>
  122. * This is the number of bytes within {@link #path} that pertain to the
  123. * current entry. Values at this index through the end of the array are
  124. * garbage and may be randomly populated from prior entries.
  125. */
  126. protected int pathLen;
  127. /** Create a new iterator with no parent. */
  128. protected AbstractTreeIterator() {
  129. parent = null;
  130. path = new byte[DEFAULT_PATH_SIZE];
  131. pathOffset = 0;
  132. }
  133. /**
  134. * Create a new iterator with no parent and a prefix.
  135. * <p>
  136. * The prefix path supplied is inserted in front of all paths generated by
  137. * this iterator. It is intended to be used when an iterator is being
  138. * created for a subsection of an overall repository and needs to be
  139. * combined with other iterators that are created to run over the entire
  140. * repository namespace.
  141. *
  142. * @param prefix
  143. * position of this iterator in the repository tree. The value
  144. * may be null or the empty string to indicate the prefix is the
  145. * root of the repository. A trailing slash ('/') is
  146. * automatically appended if the prefix does not end in '/'.
  147. */
  148. protected AbstractTreeIterator(final String prefix) {
  149. parent = null;
  150. if (prefix != null && prefix.length() > 0) {
  151. final ByteBuffer b;
  152. b = Constants.CHARSET.encode(CharBuffer.wrap(prefix));
  153. pathLen = b.limit();
  154. path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
  155. b.get(path, 0, pathLen);
  156. if (path[pathLen - 1] != '/')
  157. path[pathLen++] = '/';
  158. pathOffset = pathLen;
  159. } else {
  160. path = new byte[DEFAULT_PATH_SIZE];
  161. pathOffset = 0;
  162. }
  163. }
  164. /**
  165. * Create a new iterator with no parent and a prefix.
  166. * <p>
  167. * The prefix path supplied is inserted in front of all paths generated by
  168. * this iterator. It is intended to be used when an iterator is being
  169. * created for a subsection of an overall repository and needs to be
  170. * combined with other iterators that are created to run over the entire
  171. * repository namespace.
  172. *
  173. * @param prefix
  174. * position of this iterator in the repository tree. The value
  175. * may be null or the empty array to indicate the prefix is the
  176. * root of the repository. A trailing slash ('/') is
  177. * automatically appended if the prefix does not end in '/'.
  178. */
  179. protected AbstractTreeIterator(final byte[] prefix) {
  180. parent = null;
  181. if (prefix != null && prefix.length > 0) {
  182. pathLen = prefix.length;
  183. path = new byte[Math.max(DEFAULT_PATH_SIZE, pathLen + 1)];
  184. System.arraycopy(prefix, 0, path, 0, pathLen);
  185. if (path[pathLen - 1] != '/')
  186. path[pathLen++] = '/';
  187. pathOffset = pathLen;
  188. } else {
  189. path = new byte[DEFAULT_PATH_SIZE];
  190. pathOffset = 0;
  191. }
  192. }
  193. /**
  194. * Create an iterator for a subtree of an existing iterator.
  195. *
  196. * @param p
  197. * parent tree iterator.
  198. */
  199. protected AbstractTreeIterator(final AbstractTreeIterator p) {
  200. parent = p;
  201. path = p.path;
  202. pathOffset = p.pathLen + 1;
  203. try {
  204. path[pathOffset - 1] = '/';
  205. } catch (ArrayIndexOutOfBoundsException e) {
  206. growPath(p.pathLen);
  207. path[pathOffset - 1] = '/';
  208. }
  209. }
  210. /**
  211. * Create an iterator for a subtree of an existing iterator.
  212. * <p>
  213. * The caller is responsible for setting up the path of the child iterator.
  214. *
  215. * @param p
  216. * parent tree iterator.
  217. * @param childPath
  218. * path array to be used by the child iterator. This path must
  219. * contain the path from the top of the walk to the first child
  220. * and must end with a '/'.
  221. * @param childPathOffset
  222. * position within <code>childPath</code> where the child can
  223. * insert its data. The value at
  224. * <code>childPath[childPathOffset-1]</code> must be '/'.
  225. */
  226. protected AbstractTreeIterator(final AbstractTreeIterator p,
  227. final byte[] childPath, final int childPathOffset) {
  228. parent = p;
  229. path = childPath;
  230. pathOffset = childPathOffset;
  231. }
  232. /**
  233. * Grow the path buffer larger.
  234. *
  235. * @param len
  236. * number of live bytes in the path buffer. This many bytes will
  237. * be moved into the larger buffer.
  238. */
  239. protected void growPath(final int len) {
  240. setPathCapacity(path.length << 1, len);
  241. }
  242. /**
  243. * Ensure that path is capable to hold at least {@code capacity} bytes
  244. *
  245. * @param capacity
  246. * the amount of bytes to hold
  247. * @param len
  248. * the amount of live bytes in path buffer
  249. */
  250. protected void ensurePathCapacity(final int capacity, final int len) {
  251. if (path.length >= capacity)
  252. return;
  253. final byte[] o = path;
  254. int current = o.length;
  255. int newCapacity = current;
  256. while (newCapacity < capacity && newCapacity > 0)
  257. newCapacity <<= 1;
  258. setPathCapacity(newCapacity, len);
  259. }
  260. /**
  261. * Set path buffer capacity to the specified size
  262. *
  263. * @param capacity
  264. * the new size
  265. * @param len
  266. * the amount of bytes to copy
  267. */
  268. private void setPathCapacity(int capacity, int len) {
  269. final byte[] o = path;
  270. final byte[] n = new byte[capacity];
  271. System.arraycopy(o, 0, n, 0, len);
  272. for (AbstractTreeIterator p = this; p != null && p.path == o; p = p.parent)
  273. p.path = n;
  274. }
  275. /**
  276. * Compare the path of this current entry to another iterator's entry.
  277. *
  278. * @param p
  279. * the other iterator to compare the path against.
  280. * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
  281. * p's entry sorts first.
  282. */
  283. public int pathCompare(final AbstractTreeIterator p) {
  284. return pathCompare(p, p.mode);
  285. }
  286. int pathCompare(final AbstractTreeIterator p, final int pMode) {
  287. final byte[] a = path;
  288. final byte[] b = p.path;
  289. final int aLen = pathLen;
  290. final int bLen = p.pathLen;
  291. int cPos;
  292. // Its common when we are a subtree for both parents to match;
  293. // when this happens everything in path[0..cPos] is known to
  294. // be equal and does not require evaluation again.
  295. //
  296. cPos = alreadyMatch(this, p);
  297. for (; cPos < aLen && cPos < bLen; cPos++) {
  298. final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
  299. if (cmp != 0)
  300. return cmp;
  301. }
  302. if (cPos < aLen)
  303. return (a[cPos] & 0xff) - lastPathChar(pMode);
  304. if (cPos < bLen)
  305. return lastPathChar(mode) - (b[cPos] & 0xff);
  306. return lastPathChar(mode) - lastPathChar(pMode);
  307. }
  308. private static int alreadyMatch(AbstractTreeIterator a,
  309. AbstractTreeIterator b) {
  310. for (;;) {
  311. final AbstractTreeIterator ap = a.parent;
  312. final AbstractTreeIterator bp = b.parent;
  313. if (ap == null || bp == null)
  314. return 0;
  315. if (ap.matches == bp.matches)
  316. return a.pathOffset;
  317. a = ap;
  318. b = bp;
  319. }
  320. }
  321. private static int lastPathChar(final int mode) {
  322. return FileMode.TREE.equals(mode) ? '/' : '\0';
  323. }
  324. /**
  325. * Check if the current entry of both iterators has the same id.
  326. * <p>
  327. * This method is faster than {@link #getEntryObjectId()} as it does not
  328. * require copying the bytes out of the buffers. A direct {@link #idBuffer}
  329. * compare operation is performed.
  330. *
  331. * @param otherIterator
  332. * the other iterator to test against.
  333. * @return true if both iterators have the same object id; false otherwise.
  334. */
  335. public boolean idEqual(final AbstractTreeIterator otherIterator) {
  336. return ObjectId.equals(idBuffer(), idOffset(),
  337. otherIterator.idBuffer(), otherIterator.idOffset());
  338. }
  339. /**
  340. * Get the object id of the current entry.
  341. *
  342. * @return an object id for the current entry.
  343. */
  344. public ObjectId getEntryObjectId() {
  345. return ObjectId.fromRaw(idBuffer(), idOffset());
  346. }
  347. /**
  348. * Obtain the ObjectId for the current entry.
  349. *
  350. * @param out
  351. * buffer to copy the object id into.
  352. */
  353. public void getEntryObjectId(final MutableObjectId out) {
  354. out.fromRaw(idBuffer(), idOffset());
  355. }
  356. /** @return the file mode of the current entry. */
  357. public FileMode getEntryFileMode() {
  358. return FileMode.fromBits(mode);
  359. }
  360. /** @return the file mode of the current entry as bits */
  361. public int getEntryRawMode() {
  362. return mode;
  363. }
  364. /** @return path of the current entry, as a string. */
  365. public String getEntryPathString() {
  366. return TreeWalk.pathOf(this);
  367. }
  368. /**
  369. * Get the current entry's path hash code.
  370. * <p>
  371. * This method computes a hash code on the fly for this path, the hash is
  372. * suitable to cluster objects that may have similar paths together.
  373. *
  374. * @return path hash code; any integer may be returned.
  375. */
  376. public int getEntryPathHashCode() {
  377. int hash = 0;
  378. for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) {
  379. byte c = path[i];
  380. if (c != ' ')
  381. hash = (hash >>> 2) + (c << 24);
  382. }
  383. return hash;
  384. }
  385. /**
  386. * Get the byte array buffer object IDs must be copied out of.
  387. * <p>
  388. * The id buffer contains the bytes necessary to construct an ObjectId for
  389. * the current entry of this iterator. The buffer can be the same buffer for
  390. * all entries, or it can be a unique buffer per-entry. Implementations are
  391. * encouraged to expose their private buffer whenever possible to reduce
  392. * garbage generation and copying costs.
  393. *
  394. * @return byte array the implementation stores object IDs within.
  395. * @see #getEntryObjectId()
  396. */
  397. public abstract byte[] idBuffer();
  398. /**
  399. * Get the position within {@link #idBuffer()} of this entry's ObjectId.
  400. *
  401. * @return offset into the array returned by {@link #idBuffer()} where the
  402. * ObjectId must be copied out of.
  403. */
  404. public abstract int idOffset();
  405. /**
  406. * Create a new iterator for the current entry's subtree.
  407. * <p>
  408. * The parent reference of the iterator must be <code>this</code>,
  409. * otherwise the caller would not be able to exit out of the subtree
  410. * iterator correctly and return to continue walking <code>this</code>.
  411. *
  412. * @param reader
  413. * reader to load the tree data from.
  414. * @return a new parser that walks over the current subtree.
  415. * @throws IncorrectObjectTypeException
  416. * the current entry is not actually a tree and cannot be parsed
  417. * as though it were a tree.
  418. * @throws IOException
  419. * a loose object or pack file could not be read.
  420. */
  421. public abstract AbstractTreeIterator createSubtreeIterator(
  422. ObjectReader reader) throws IncorrectObjectTypeException,
  423. IOException;
  424. /**
  425. * Create a new iterator as though the current entry were a subtree.
  426. *
  427. * @return a new empty tree iterator.
  428. */
  429. public EmptyTreeIterator createEmptyTreeIterator() {
  430. return new EmptyTreeIterator(this);
  431. }
  432. /**
  433. * Create a new iterator for the current entry's subtree.
  434. * <p>
  435. * The parent reference of the iterator must be <code>this</code>, otherwise
  436. * the caller would not be able to exit out of the subtree iterator
  437. * correctly and return to continue walking <code>this</code>.
  438. *
  439. * @param reader
  440. * reader to load the tree data from.
  441. * @param idBuffer
  442. * temporary ObjectId buffer for use by this method.
  443. * @return a new parser that walks over the current subtree.
  444. * @throws IncorrectObjectTypeException
  445. * the current entry is not actually a tree and cannot be parsed
  446. * as though it were a tree.
  447. * @throws IOException
  448. * a loose object or pack file could not be read.
  449. */
  450. public AbstractTreeIterator createSubtreeIterator(
  451. final ObjectReader reader, final MutableObjectId idBuffer)
  452. throws IncorrectObjectTypeException, IOException {
  453. return createSubtreeIterator(reader);
  454. }
  455. /**
  456. * Position this iterator on the first entry.
  457. *
  458. * The default implementation of this method uses {@code back(1)} until
  459. * {@code first()} is true. This is most likely not the most efficient
  460. * method of repositioning the iterator to its first entry, so subclasses
  461. * are strongly encouraged to override the method.
  462. *
  463. * @throws CorruptObjectException
  464. * the tree is invalid.
  465. */
  466. public void reset() throws CorruptObjectException {
  467. while (!first())
  468. back(1);
  469. }
  470. /**
  471. * Is this tree iterator positioned on its first entry?
  472. * <p>
  473. * An iterator is positioned on the first entry if <code>back(1)</code>
  474. * would be an invalid request as there is no entry before the current one.
  475. * <p>
  476. * An empty iterator (one with no entries) will be
  477. * <code>first() &amp;&amp; eof()</code>.
  478. *
  479. * @return true if the iterator is positioned on the first entry.
  480. */
  481. public abstract boolean first();
  482. /**
  483. * Is this tree iterator at its EOF point (no more entries)?
  484. * <p>
  485. * An iterator is at EOF if there is no current entry.
  486. *
  487. * @return true if we have walked all entries and have none left.
  488. */
  489. public abstract boolean eof();
  490. /**
  491. * Move to next entry, populating this iterator with the entry data.
  492. * <p>
  493. * The delta indicates how many moves forward should occur. The most common
  494. * delta is 1 to move to the next entry.
  495. * <p>
  496. * Implementations must populate the following members:
  497. * <ul>
  498. * <li>{@link #mode}</li>
  499. * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
  500. * <li>{@link #pathLen}</li>
  501. * </ul>
  502. * as well as any implementation dependent information necessary to
  503. * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
  504. * when demanded.
  505. *
  506. * @param delta
  507. * number of entries to move the iterator by. Must be a positive,
  508. * non-zero integer.
  509. * @throws CorruptObjectException
  510. * the tree is invalid.
  511. */
  512. public abstract void next(int delta) throws CorruptObjectException;
  513. /**
  514. * Move to prior entry, populating this iterator with the entry data.
  515. * <p>
  516. * The delta indicates how many moves backward should occur.The most common
  517. * delta is 1 to move to the prior entry.
  518. * <p>
  519. * Implementations must populate the following members:
  520. * <ul>
  521. * <li>{@link #mode}</li>
  522. * <li>{@link #path} (from {@link #pathOffset} to {@link #pathLen})</li>
  523. * <li>{@link #pathLen}</li>
  524. * </ul>
  525. * as well as any implementation dependent information necessary to
  526. * accurately return data from {@link #idBuffer()} and {@link #idOffset()}
  527. * when demanded.
  528. *
  529. * @param delta
  530. * number of entries to move the iterator by. Must be a positive,
  531. * non-zero integer.
  532. * @throws CorruptObjectException
  533. * the tree is invalid.
  534. */
  535. public abstract void back(int delta) throws CorruptObjectException;
  536. /**
  537. * Advance to the next tree entry, populating this iterator with its data.
  538. * <p>
  539. * This method behaves like <code>seek(1)</code> but is called by
  540. * {@link TreeWalk} only if a {@link TreeFilter} was used and ruled out the
  541. * current entry from the results. In such cases this tree iterator may
  542. * perform special behavior.
  543. *
  544. * @throws CorruptObjectException
  545. * the tree is invalid.
  546. */
  547. public void skip() throws CorruptObjectException {
  548. next(1);
  549. }
  550. /**
  551. * Indicates to the iterator that no more entries will be read.
  552. * <p>
  553. * This is only invoked by TreeWalk when the iteration is aborted early due
  554. * to a {@link org.eclipse.jgit.errors.StopWalkException} being thrown from
  555. * within a TreeFilter.
  556. */
  557. public void stopWalk() {
  558. // Do nothing by default. Most iterators do not care.
  559. }
  560. /**
  561. * @return the length of the name component of the path for the current entry
  562. */
  563. public int getNameLength() {
  564. return pathLen - pathOffset;
  565. }
  566. /**
  567. * Get the name component of the current entry path into the provided buffer.
  568. *
  569. * @param buffer the buffer to get the name into, it is assumed that buffer can hold the name
  570. * @param offset the offset of the name in the buffer
  571. * @see #getNameLength()
  572. */
  573. public void getName(byte[] buffer, int offset) {
  574. System.arraycopy(path, pathOffset, buffer, offset, pathLen - pathOffset);
  575. }
  576. }