You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BlameGenerator.java 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.blame;
  44. import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
  45. import java.io.IOException;
  46. import java.util.Collection;
  47. import java.util.Collections;
  48. import org.eclipse.jgit.blame.Candidate.BlobCandidate;
  49. import org.eclipse.jgit.blame.Candidate.ReverseCandidate;
  50. import org.eclipse.jgit.blame.ReverseWalk.ReverseCommit;
  51. import org.eclipse.jgit.diff.DiffAlgorithm;
  52. import org.eclipse.jgit.diff.DiffEntry;
  53. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  54. import org.eclipse.jgit.diff.EditList;
  55. import org.eclipse.jgit.diff.HistogramDiff;
  56. import org.eclipse.jgit.diff.RawText;
  57. import org.eclipse.jgit.diff.RawTextComparator;
  58. import org.eclipse.jgit.diff.RenameDetector;
  59. import org.eclipse.jgit.internal.JGitText;
  60. import org.eclipse.jgit.lib.AnyObjectId;
  61. import org.eclipse.jgit.lib.MutableObjectId;
  62. import org.eclipse.jgit.lib.ObjectId;
  63. import org.eclipse.jgit.lib.ObjectLoader;
  64. import org.eclipse.jgit.lib.ObjectReader;
  65. import org.eclipse.jgit.lib.PersonIdent;
  66. import org.eclipse.jgit.lib.Repository;
  67. import org.eclipse.jgit.revwalk.RevCommit;
  68. import org.eclipse.jgit.revwalk.RevFlag;
  69. import org.eclipse.jgit.revwalk.RevWalk;
  70. import org.eclipse.jgit.treewalk.TreeWalk;
  71. import org.eclipse.jgit.treewalk.filter.PathFilter;
  72. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  73. /**
  74. * Generate author information for lines based on introduction to the file.
  75. * <p>
  76. * Applications that want a simple one-shot computation of blame for a file
  77. * should use {@link #computeBlameResult()} to prepare the entire result in one
  78. * method call. This may block for significant time as the history of the
  79. * repository must be traversed until information is gathered for every line.
  80. * <p>
  81. * Applications that want more incremental update behavior may use either the
  82. * raw {@link #next()} streaming approach supported by this class, or construct
  83. * a {@link BlameResult} using {@link BlameResult#create(BlameGenerator)} and
  84. * incrementally construct the result with {@link BlameResult#computeNext()}.
  85. * <p>
  86. * This class is not thread-safe.
  87. * <p>
  88. * An instance of BlameGenerator can only be used once. To blame multiple files
  89. * the application must create a new BlameGenerator.
  90. * <p>
  91. * During blame processing there are two files involved:
  92. * <ul>
  93. * <li>result - The file whose lines are being examined. This is the revision
  94. * the user is trying to view blame/annotation information alongside of.</li>
  95. * <li>source - The file that was blamed with supplying one or more lines of
  96. * data into result. The source may be a different file path (due to copy or
  97. * rename). Source line numbers may differ from result line numbers due to lines
  98. * being added/removed in intermediate revisions.</li>
  99. * </ul>
  100. * <p>
  101. * The blame algorithm is implemented by initially assigning responsibility for
  102. * all lines of the result to the starting commit. A difference against the
  103. * commit's ancestor is computed, and responsibility is passed to the ancestor
  104. * commit for any lines that are common. The starting commit is blamed only for
  105. * the lines that do not appear in the ancestor, if any. The loop repeats using
  106. * the ancestor, until there are no more lines to acquire information on, or the
  107. * file's creation point is discovered in history.
  108. */
  109. public class BlameGenerator {
  110. private final Repository repository;
  111. private final PathFilter resultPath;
  112. private final MutableObjectId idBuf;
  113. /** Revision pool used to acquire commits from. */
  114. private RevWalk revPool;
  115. /** Indicates the commit has already been processed. */
  116. private RevFlag SEEN;
  117. private ObjectReader reader;
  118. private TreeWalk treeWalk;
  119. private DiffAlgorithm diffAlgorithm = new HistogramDiff();
  120. private RawTextComparator textComparator = RawTextComparator.DEFAULT;
  121. private RenameDetector renameDetector;
  122. /** Potential candidates, sorted by commit time descending. */
  123. private Candidate queue;
  124. /** Number of lines that still need to be discovered. */
  125. private int remaining;
  126. /** Blame is currently assigned to this source. */
  127. private Candidate currentSource;
  128. /**
  129. * Create a blame generator for the repository and path
  130. *
  131. * @param repository
  132. * repository to access revision data from.
  133. * @param path
  134. * initial path of the file to start scanning.
  135. */
  136. public BlameGenerator(Repository repository, String path) {
  137. this.repository = repository;
  138. this.resultPath = PathFilter.create(path);
  139. idBuf = new MutableObjectId();
  140. setFollowFileRenames(true);
  141. initRevPool(false);
  142. remaining = -1;
  143. }
  144. private void initRevPool(boolean reverse) {
  145. if (queue != null)
  146. throw new IllegalStateException();
  147. if (revPool != null)
  148. revPool.release();
  149. if (reverse)
  150. revPool = new ReverseWalk(getRepository());
  151. else
  152. revPool = new RevWalk(getRepository());
  153. revPool.setRetainBody(true);
  154. SEEN = revPool.newFlag("SEEN");
  155. reader = revPool.getObjectReader();
  156. treeWalk = new TreeWalk(reader);
  157. treeWalk.setRecursive(true);
  158. }
  159. /** @return repository being scanned for revision history. */
  160. public Repository getRepository() {
  161. return repository;
  162. }
  163. /** @return path file path being processed. */
  164. public String getResultPath() {
  165. return resultPath.getPath();
  166. }
  167. /**
  168. * Difference algorithm to use when comparing revisions.
  169. *
  170. * @param algorithm
  171. * @return {@code this}
  172. */
  173. public BlameGenerator setDiffAlgorithm(DiffAlgorithm algorithm) {
  174. diffAlgorithm = algorithm;
  175. return this;
  176. }
  177. /**
  178. * Text comparator to use when comparing revisions.
  179. *
  180. * @param comparator
  181. * @return {@code this}
  182. */
  183. public BlameGenerator setTextComparator(RawTextComparator comparator) {
  184. textComparator = comparator;
  185. return this;
  186. }
  187. /**
  188. * Enable (or disable) following file renames, on by default.
  189. * <p>
  190. * If true renames are followed using the standard FollowFilter behavior
  191. * used by RevWalk (which matches {@code git log --follow} in the C
  192. * implementation). This is not the same as copy/move detection as
  193. * implemented by the C implementation's of {@code git blame -M -C}.
  194. *
  195. * @param follow
  196. * enable following.
  197. * @return {@code this}
  198. */
  199. public BlameGenerator setFollowFileRenames(boolean follow) {
  200. if (follow)
  201. renameDetector = new RenameDetector(getRepository());
  202. else
  203. renameDetector = null;
  204. return this;
  205. }
  206. /**
  207. * Obtain the RenameDetector if {@code setFollowFileRenames(true)}.
  208. *
  209. * @return the rename detector, allowing the application to configure its
  210. * settings for rename score and breaking behavior.
  211. */
  212. public RenameDetector getRenameDetector() {
  213. return renameDetector;
  214. }
  215. /**
  216. * Push a candidate blob onto the generator's traversal stack.
  217. * <p>
  218. * Candidates should be pushed in history order from oldest-to-newest.
  219. * Applications should push the starting commit first, then the index
  220. * revision (if the index is interesting), and finally the working tree
  221. * copy (if the working tree is interesting).
  222. *
  223. * @param description
  224. * description of the blob revision, such as "Working Tree".
  225. * @param contents
  226. * contents of the file.
  227. * @return {@code this}
  228. * @throws IOException
  229. * the repository cannot be read.
  230. */
  231. public BlameGenerator push(String description, byte[] contents)
  232. throws IOException {
  233. return push(description, new RawText(contents));
  234. }
  235. /**
  236. * Push a candidate blob onto the generator's traversal stack.
  237. * <p>
  238. * Candidates should be pushed in history order from oldest-to-newest.
  239. * Applications should push the starting commit first, then the index
  240. * revision (if the index is interesting), and finally the working tree copy
  241. * (if the working tree is interesting).
  242. *
  243. * @param description
  244. * description of the blob revision, such as "Working Tree".
  245. * @param contents
  246. * contents of the file.
  247. * @return {@code this}
  248. * @throws IOException
  249. * the repository cannot be read.
  250. */
  251. public BlameGenerator push(String description, RawText contents)
  252. throws IOException {
  253. if (description == null)
  254. description = JGitText.get().blameNotCommittedYet;
  255. BlobCandidate c = new BlobCandidate(description, resultPath);
  256. c.sourceText = contents;
  257. c.regionList = new Region(0, 0, contents.size());
  258. remaining = contents.size();
  259. push(c);
  260. return this;
  261. }
  262. /**
  263. * Push a candidate object onto the generator's traversal stack.
  264. * <p>
  265. * Candidates should be pushed in history order from oldest-to-newest.
  266. * Applications should push the starting commit first, then the index
  267. * revision (if the index is interesting), and finally the working tree copy
  268. * (if the working tree is interesting).
  269. *
  270. * @param description
  271. * description of the blob revision, such as "Working Tree".
  272. * @param id
  273. * may be a commit or a blob.
  274. * @return {@code this}
  275. * @throws IOException
  276. * the repository cannot be read.
  277. */
  278. public BlameGenerator push(String description, AnyObjectId id)
  279. throws IOException {
  280. ObjectLoader ldr = reader.open(id);
  281. if (ldr.getType() == OBJ_BLOB) {
  282. if (description == null)
  283. description = JGitText.get().blameNotCommittedYet;
  284. BlobCandidate c = new BlobCandidate(description, resultPath);
  285. c.sourceBlob = id.toObjectId();
  286. c.sourceText = new RawText(ldr.getCachedBytes(Integer.MAX_VALUE));
  287. c.regionList = new Region(0, 0, c.sourceText.size());
  288. remaining = c.sourceText.size();
  289. push(c);
  290. return this;
  291. }
  292. RevCommit commit = revPool.parseCommit(id);
  293. if (!find(commit, resultPath))
  294. return this;
  295. Candidate c = new Candidate(commit, resultPath);
  296. c.sourceBlob = idBuf.toObjectId();
  297. c.loadText(reader);
  298. c.regionList = new Region(0, 0, c.sourceText.size());
  299. remaining = c.sourceText.size();
  300. push(c);
  301. return this;
  302. }
  303. /**
  304. * Configure the generator to compute reverse blame (history of deletes).
  305. * <p>
  306. * This method is expensive as it immediately runs a RevWalk over the
  307. * history spanning the expression {@code start..end} (end being more recent
  308. * than start) and then performs the equivalent operation as
  309. * {@link #push(String, AnyObjectId)} to begin blame traversal from the
  310. * commit named by {@code start} walking forwards through history until
  311. * {@code end} blaming line deletions.
  312. * <p>
  313. * A reverse blame may produce multiple sources for the same result line,
  314. * each of these is a descendant commit that removed the line, typically
  315. * this occurs when the same deletion appears in multiple side branches such
  316. * as due to a cherry-pick. Applications relying on reverse should use
  317. * {@link BlameResult} as it filters these duplicate sources and only
  318. * remembers the first (oldest) deletion.
  319. *
  320. * @param start
  321. * oldest commit to traverse from. The result file will be loaded
  322. * from this commit's tree.
  323. * @param end
  324. * most recent commit to stop traversal at. Usually an active
  325. * branch tip, tag, or HEAD.
  326. * @return {@code this}
  327. * @throws IOException
  328. * the repository cannot be read.
  329. */
  330. public BlameGenerator reverse(AnyObjectId start, AnyObjectId end)
  331. throws IOException {
  332. return reverse(start, Collections.singleton(end.toObjectId()));
  333. }
  334. /**
  335. * Configure the generator to compute reverse blame (history of deletes).
  336. * <p>
  337. * This method is expensive as it immediately runs a RevWalk over the
  338. * history spanning the expression {@code start..end} (end being more recent
  339. * than start) and then performs the equivalent operation as
  340. * {@link #push(String, AnyObjectId)} to begin blame traversal from the
  341. * commit named by {@code start} walking forwards through history until
  342. * {@code end} blaming line deletions.
  343. * <p>
  344. * A reverse blame may produce multiple sources for the same result line,
  345. * each of these is a descendant commit that removed the line, typically
  346. * this occurs when the same deletion appears in multiple side branches such
  347. * as due to a cherry-pick. Applications relying on reverse should use
  348. * {@link BlameResult} as it filters these duplicate sources and only
  349. * remembers the first (oldest) deletion.
  350. *
  351. * @param start
  352. * oldest commit to traverse from. The result file will be loaded
  353. * from this commit's tree.
  354. * @param end
  355. * most recent commits to stop traversal at. Usually an active
  356. * branch tip, tag, or HEAD.
  357. * @return {@code this}
  358. * @throws IOException
  359. * the repository cannot be read.
  360. */
  361. public BlameGenerator reverse(AnyObjectId start,
  362. Collection<? extends ObjectId> end) throws IOException {
  363. initRevPool(true);
  364. ReverseCommit result = (ReverseCommit) revPool.parseCommit(start);
  365. if (!find(result, resultPath))
  366. return this;
  367. revPool.markUninteresting(result);
  368. for (ObjectId id : end)
  369. revPool.markStart(revPool.parseCommit(id));
  370. while (revPool.next() != null) {
  371. // just pump the queue
  372. }
  373. ReverseCandidate c = new ReverseCandidate(result, resultPath);
  374. c.sourceBlob = idBuf.toObjectId();
  375. c.loadText(reader);
  376. c.regionList = new Region(0, 0, c.sourceText.size());
  377. remaining = c.sourceText.size();
  378. push(c);
  379. return this;
  380. }
  381. /**
  382. * Execute the generator in a blocking fashion until all data is ready.
  383. *
  384. * @return the complete result. Null if no file exists for the given path.
  385. * @throws IOException
  386. * the repository cannot be read.
  387. */
  388. public BlameResult computeBlameResult() throws IOException {
  389. try {
  390. BlameResult r = BlameResult.create(this);
  391. if (r != null)
  392. r.computeAll();
  393. return r;
  394. } finally {
  395. release();
  396. }
  397. }
  398. /**
  399. * Step the blame algorithm one iteration.
  400. *
  401. * @return true if the generator has found a region's source. The getSource*
  402. * and {@link #getResultStart()}, {@link #getResultEnd()} methods
  403. * can be used to inspect the region found. False if there are no
  404. * more regions to describe.
  405. * @throws IOException
  406. * repository cannot be read.
  407. */
  408. public boolean next() throws IOException {
  409. // If there is a source still pending, produce the next region.
  410. if (currentSource != null) {
  411. Region r = currentSource.regionList;
  412. Region n = r.next;
  413. remaining -= r.length;
  414. if (n != null) {
  415. currentSource.regionList = n;
  416. return true;
  417. }
  418. if (currentSource.queueNext != null)
  419. return result(currentSource.queueNext);
  420. currentSource = null;
  421. }
  422. // If there are no lines remaining, the entire result is done,
  423. // even if there are revisions still available for the path.
  424. if (remaining == 0)
  425. return done();
  426. for (;;) {
  427. Candidate n = pop();
  428. if (n == null)
  429. return done();
  430. int pCnt = n.getParentCount();
  431. if (pCnt == 1) {
  432. if (processOne(n))
  433. return true;
  434. } else if (1 < pCnt) {
  435. if (processMerge(n))
  436. return true;
  437. } else if (n instanceof ReverseCandidate) {
  438. // Do not generate a tip of a reverse. The region
  439. // survives and should not appear to be deleted.
  440. } else /* if (pCnt == 0) */{
  441. // Root commit, with at least one surviving region.
  442. // Assign the remaining blame here.
  443. return result(n);
  444. }
  445. }
  446. }
  447. private boolean done() {
  448. release();
  449. return false;
  450. }
  451. private boolean result(Candidate n) throws IOException {
  452. if (n.sourceCommit != null)
  453. revPool.parseBody(n.sourceCommit);
  454. currentSource = n;
  455. return true;
  456. }
  457. private boolean reverseResult(Candidate parent, Candidate source)
  458. throws IOException {
  459. // On a reverse blame present the application the parent
  460. // (as this is what did the removals), however the region
  461. // list to enumerate is the source's surviving list.
  462. Candidate res = parent.copy(parent.sourceCommit);
  463. res.regionList = source.regionList;
  464. return result(res);
  465. }
  466. private Candidate pop() {
  467. Candidate n = queue;
  468. if (n != null) {
  469. queue = n.queueNext;
  470. n.queueNext = null;
  471. }
  472. return n;
  473. }
  474. private void push(BlobCandidate toInsert) {
  475. Candidate c = queue;
  476. if (c != null) {
  477. c.regionList = null;
  478. toInsert.parent = c;
  479. }
  480. queue = toInsert;
  481. }
  482. private void push(Candidate toInsert) {
  483. // Mark sources to ensure they get discarded (above) if
  484. // another path to the same commit.
  485. toInsert.add(SEEN);
  486. // Insert into the queue using descending commit time, so
  487. // the most recent commit will pop next.
  488. int time = toInsert.getTime();
  489. Candidate n = queue;
  490. if (n == null || time >= n.getTime()) {
  491. toInsert.queueNext = n;
  492. queue = toInsert;
  493. return;
  494. }
  495. for (Candidate p = n;; p = n) {
  496. n = p.queueNext;
  497. if (n == null || time >= n.getTime()) {
  498. toInsert.queueNext = n;
  499. p.queueNext = toInsert;
  500. return;
  501. }
  502. }
  503. }
  504. private boolean processOne(Candidate n) throws IOException {
  505. RevCommit parent = n.getParent(0);
  506. if (parent == null)
  507. return split(n.getNextCandidate(0), n);
  508. if (parent.has(SEEN))
  509. return false;
  510. revPool.parseHeaders(parent);
  511. if (find(parent, n.sourcePath)) {
  512. if (idBuf.equals(n.sourceBlob)) {
  513. // The common case of the file not being modified in
  514. // a simple string-of-pearls history. Blame parent.
  515. n.sourceCommit = parent;
  516. push(n);
  517. return false;
  518. }
  519. Candidate next = n.create(parent, n.sourcePath);
  520. next.sourceBlob = idBuf.toObjectId();
  521. next.loadText(reader);
  522. return split(next, n);
  523. }
  524. if (n.sourceCommit == null)
  525. return result(n);
  526. DiffEntry r = findRename(parent, n.sourceCommit, n.sourcePath);
  527. if (r == null)
  528. return result(n);
  529. if (0 == r.getOldId().prefixCompare(n.sourceBlob)) {
  530. // A 100% rename without any content change can also
  531. // skip directly to the parent.
  532. n.sourceCommit = parent;
  533. n.sourcePath = PathFilter.create(r.getOldPath());
  534. push(n);
  535. return false;
  536. }
  537. Candidate next = n.create(parent, PathFilter.create(r.getOldPath()));
  538. next.sourceBlob = r.getOldId().toObjectId();
  539. next.renameScore = r.getScore();
  540. next.loadText(reader);
  541. return split(next, n);
  542. }
  543. private boolean split(Candidate parent, Candidate source)
  544. throws IOException {
  545. EditList editList = diffAlgorithm.diff(textComparator,
  546. parent.sourceText, source.sourceText);
  547. if (editList.isEmpty()) {
  548. // Ignoring whitespace (or some other special comparator) can
  549. // cause non-identical blobs to have an empty edit list. In
  550. // a case like this push the parent alone.
  551. parent.regionList = source.regionList;
  552. push(parent);
  553. return false;
  554. }
  555. parent.takeBlame(editList, source);
  556. if (parent.regionList != null)
  557. push(parent);
  558. if (source.regionList != null) {
  559. if (source instanceof ReverseCandidate)
  560. return reverseResult(parent, source);
  561. return result(source);
  562. }
  563. return false;
  564. }
  565. private boolean processMerge(Candidate n) throws IOException {
  566. int pCnt = n.getParentCount();
  567. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  568. RevCommit parent = n.getParent(pIdx);
  569. if (parent.has(SEEN))
  570. continue;
  571. revPool.parseHeaders(parent);
  572. }
  573. // If any single parent exactly matches the merge, follow only
  574. // that one parent through history.
  575. ObjectId[] ids = null;
  576. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  577. RevCommit parent = n.getParent(pIdx);
  578. if (parent.has(SEEN))
  579. continue;
  580. if (!find(parent, n.sourcePath))
  581. continue;
  582. if (!(n instanceof ReverseCandidate) && idBuf.equals(n.sourceBlob)) {
  583. n.sourceCommit = parent;
  584. push(n);
  585. return false;
  586. }
  587. if (ids == null)
  588. ids = new ObjectId[pCnt];
  589. ids[pIdx] = idBuf.toObjectId();
  590. }
  591. // If rename detection is enabled, search for any relevant names.
  592. DiffEntry[] renames = null;
  593. if (renameDetector != null) {
  594. renames = new DiffEntry[pCnt];
  595. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  596. RevCommit parent = n.getParent(pIdx);
  597. if (parent.has(SEEN))
  598. continue;
  599. if (ids != null && ids[pIdx] != null)
  600. continue;
  601. DiffEntry r = findRename(parent, n.sourceCommit, n.sourcePath);
  602. if (r == null)
  603. continue;
  604. if (n instanceof ReverseCandidate) {
  605. if (ids == null)
  606. ids = new ObjectId[pCnt];
  607. ids[pCnt] = r.getOldId().toObjectId();
  608. } else if (0 == r.getOldId().prefixCompare(n.sourceBlob)) {
  609. // A 100% rename without any content change can also
  610. // skip directly to the parent. Note this bypasses an
  611. // earlier parent that had the path (above) but did not
  612. // have an exact content match. For performance reasons
  613. // we choose to follow the one parent over trying to do
  614. // possibly both parents.
  615. n.sourceCommit = parent;
  616. n.sourcePath = PathFilter.create(r.getOldPath());
  617. push(n);
  618. return false;
  619. }
  620. renames[pIdx] = r;
  621. }
  622. }
  623. // Construct the candidate for each parent.
  624. Candidate[] parents = new Candidate[pCnt];
  625. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  626. RevCommit parent = n.getParent(pIdx);
  627. if (parent.has(SEEN))
  628. continue;
  629. Candidate p;
  630. if (renames != null && renames[pIdx] != null) {
  631. p = n.create(parent,
  632. PathFilter.create(renames[pIdx].getOldPath()));
  633. p.renameScore = renames[pIdx].getScore();
  634. p.sourceBlob = renames[pIdx].getOldId().toObjectId();
  635. } else if (ids != null && ids[pIdx] != null) {
  636. p = n.create(parent, n.sourcePath);
  637. p.sourceBlob = ids[pIdx];
  638. } else {
  639. continue;
  640. }
  641. EditList editList;
  642. if (n instanceof ReverseCandidate
  643. && p.sourceBlob.equals(n.sourceBlob)) {
  644. // This special case happens on ReverseCandidate forks.
  645. p.sourceText = n.sourceText;
  646. editList = new EditList(0);
  647. } else {
  648. p.loadText(reader);
  649. editList = diffAlgorithm.diff(textComparator,
  650. p.sourceText, n.sourceText);
  651. }
  652. if (editList.isEmpty()) {
  653. // Ignoring whitespace (or some other special comparator) can
  654. // cause non-identical blobs to have an empty edit list. In
  655. // a case like this push the parent alone.
  656. if (n instanceof ReverseCandidate) {
  657. parents[pIdx] = p;
  658. continue;
  659. }
  660. p.regionList = n.regionList;
  661. push(p);
  662. return false;
  663. }
  664. p.takeBlame(editList, n);
  665. // Only remember this parent candidate if there is at least
  666. // one region that was blamed on the parent.
  667. if (p.regionList != null) {
  668. // Reverse blame requires inverting the regions. This puts
  669. // the regions the parent deleted from us into the parent,
  670. // and retains the common regions to look at other parents
  671. // for deletions.
  672. if (n instanceof ReverseCandidate) {
  673. Region r = p.regionList;
  674. p.regionList = n.regionList;
  675. n.regionList = r;
  676. }
  677. parents[pIdx] = p;
  678. }
  679. }
  680. if (n instanceof ReverseCandidate) {
  681. // On a reverse blame report all deletions found in the children,
  682. // and pass on to them a copy of our region list.
  683. Candidate resultHead = null;
  684. Candidate resultTail = null;
  685. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  686. Candidate p = parents[pIdx];
  687. if (p == null)
  688. continue;
  689. if (p.regionList != null) {
  690. Candidate r = p.copy(p.sourceCommit);
  691. if (resultTail != null) {
  692. resultTail.queueNext = r;
  693. resultTail = r;
  694. } else {
  695. resultHead = r;
  696. resultTail = r;
  697. }
  698. }
  699. if (n.regionList != null) {
  700. p.regionList = n.regionList.deepCopy();
  701. push(p);
  702. }
  703. }
  704. if (resultHead != null)
  705. return result(resultHead);
  706. return false;
  707. }
  708. // Push any parents that are still candidates.
  709. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  710. if (parents[pIdx] != null)
  711. push(parents[pIdx]);
  712. }
  713. if (n.regionList != null)
  714. return result(n);
  715. return false;
  716. }
  717. /**
  718. * Get the revision blamed for the current region.
  719. * <p>
  720. * The source commit may be null if the line was blamed to an uncommitted
  721. * revision, such as the working tree copy, or during a reverse blame if the
  722. * line survives to the end revision (e.g. the branch tip).
  723. *
  724. * @return current revision being blamed.
  725. */
  726. public RevCommit getSourceCommit() {
  727. return currentSource.sourceCommit;
  728. }
  729. /** @return current author being blamed. */
  730. public PersonIdent getSourceAuthor() {
  731. return currentSource.getAuthor();
  732. }
  733. /** @return current committer being blamed. */
  734. public PersonIdent getSourceCommitter() {
  735. RevCommit c = getSourceCommit();
  736. return c != null ? c.getCommitterIdent() : null;
  737. }
  738. /** @return path of the file being blamed. */
  739. public String getSourcePath() {
  740. return currentSource.sourcePath.getPath();
  741. }
  742. /** @return rename score if a rename occurred in {@link #getSourceCommit}. */
  743. public int getRenameScore() {
  744. return currentSource.renameScore;
  745. }
  746. /**
  747. * @return first line of the source data that has been blamed for the
  748. * current region. This is line number of where the region was added
  749. * during {@link #getSourceCommit()} in file
  750. * {@link #getSourcePath()}.
  751. */
  752. public int getSourceStart() {
  753. return currentSource.regionList.sourceStart;
  754. }
  755. /**
  756. * @return one past the range of the source data that has been blamed for
  757. * the current region. This is line number of where the region was
  758. * added during {@link #getSourceCommit()} in file
  759. * {@link #getSourcePath()}.
  760. */
  761. public int getSourceEnd() {
  762. Region r = currentSource.regionList;
  763. return r.sourceStart + r.length;
  764. }
  765. /**
  766. * @return first line of the result that {@link #getSourceCommit()} has been
  767. * blamed for providing. Line numbers use 0 based indexing.
  768. */
  769. public int getResultStart() {
  770. return currentSource.regionList.resultStart;
  771. }
  772. /**
  773. * @return one past the range of the result that {@link #getSourceCommit()}
  774. * has been blamed for providing. Line numbers use 0 based indexing.
  775. * Because a source cannot be blamed for an empty region of the
  776. * result, {@link #getResultEnd()} is always at least one larger
  777. * than {@link #getResultStart()}.
  778. */
  779. public int getResultEnd() {
  780. Region r = currentSource.regionList;
  781. return r.resultStart + r.length;
  782. }
  783. /**
  784. * @return number of lines in the current region being blamed to
  785. * {@link #getSourceCommit()}. This is always the value of the
  786. * expression {@code getResultEnd() - getResultStart()}, but also
  787. * {@code getSourceEnd() - getSourceStart()}.
  788. */
  789. public int getRegionLength() {
  790. return currentSource.regionList.length;
  791. }
  792. /**
  793. * @return complete contents of the source file blamed for the current
  794. * output region. This is the contents of {@link #getSourcePath()}
  795. * within {@link #getSourceCommit()}. The source contents is
  796. * temporarily available as an artifact of the blame algorithm. Most
  797. * applications will want the result contents for display to users.
  798. */
  799. public RawText getSourceContents() {
  800. return currentSource.sourceText;
  801. }
  802. /**
  803. * @return complete file contents of the result file blame is annotating.
  804. * This value is accessible only after being configured and only
  805. * immediately before the first call to {@link #next()}. Returns
  806. * null if the path does not exist.
  807. * @throws IOException
  808. * repository cannot be read.
  809. * @throws IllegalStateException
  810. * {@link #next()} has already been invoked.
  811. */
  812. public RawText getResultContents() throws IOException {
  813. return queue != null ? queue.sourceText : null;
  814. }
  815. /** Release the current blame session. */
  816. public void release() {
  817. revPool.release();
  818. queue = null;
  819. currentSource = null;
  820. }
  821. private boolean find(RevCommit commit, PathFilter path) throws IOException {
  822. treeWalk.setFilter(path);
  823. treeWalk.reset(commit.getTree());
  824. while (treeWalk.next()) {
  825. if (path.isDone(treeWalk)) {
  826. if (treeWalk.getFileMode(0).getObjectType() != OBJ_BLOB)
  827. return false;
  828. treeWalk.getObjectId(idBuf, 0);
  829. return true;
  830. }
  831. if (treeWalk.isSubtree())
  832. treeWalk.enterSubtree();
  833. }
  834. return false;
  835. }
  836. private DiffEntry findRename(RevCommit parent, RevCommit commit,
  837. PathFilter path) throws IOException {
  838. if (renameDetector == null)
  839. return null;
  840. treeWalk.setFilter(TreeFilter.ANY_DIFF);
  841. treeWalk.reset(parent.getTree(), commit.getTree());
  842. renameDetector.reset();
  843. renameDetector.addAll(DiffEntry.scan(treeWalk));
  844. for (DiffEntry ent : renameDetector.compute()) {
  845. if (isRename(ent) && ent.getNewPath().equals(path.getPath()))
  846. return ent;
  847. }
  848. return null;
  849. }
  850. private static boolean isRename(DiffEntry ent) {
  851. return ent.getChangeType() == ChangeType.RENAME
  852. || ent.getChangeType() == ChangeType.COPY;
  853. }
  854. }