You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BlameGenerator.java 31KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.blame;
  44. import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
  45. import static org.eclipse.jgit.lib.FileMode.TYPE_FILE;
  46. import java.io.IOException;
  47. import java.util.Collection;
  48. import java.util.Collections;
  49. import org.eclipse.jgit.blame.Candidate.BlobCandidate;
  50. import org.eclipse.jgit.blame.Candidate.ReverseCandidate;
  51. import org.eclipse.jgit.blame.ReverseWalk.ReverseCommit;
  52. import org.eclipse.jgit.diff.DiffAlgorithm;
  53. import org.eclipse.jgit.diff.DiffEntry;
  54. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  55. import org.eclipse.jgit.diff.EditList;
  56. import org.eclipse.jgit.diff.HistogramDiff;
  57. import org.eclipse.jgit.diff.RawText;
  58. import org.eclipse.jgit.diff.RawTextComparator;
  59. import org.eclipse.jgit.diff.RenameDetector;
  60. import org.eclipse.jgit.internal.JGitText;
  61. import org.eclipse.jgit.lib.AnyObjectId;
  62. import org.eclipse.jgit.lib.MutableObjectId;
  63. import org.eclipse.jgit.lib.ObjectId;
  64. import org.eclipse.jgit.lib.ObjectLoader;
  65. import org.eclipse.jgit.lib.ObjectReader;
  66. import org.eclipse.jgit.lib.PersonIdent;
  67. import org.eclipse.jgit.lib.Repository;
  68. import org.eclipse.jgit.revwalk.RevCommit;
  69. import org.eclipse.jgit.revwalk.RevFlag;
  70. import org.eclipse.jgit.revwalk.RevWalk;
  71. import org.eclipse.jgit.treewalk.TreeWalk;
  72. import org.eclipse.jgit.treewalk.filter.AndTreeFilter;
  73. import org.eclipse.jgit.treewalk.filter.PathFilter;
  74. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  75. /**
  76. * Generate author information for lines based on a provided file.
  77. * <p>
  78. * Applications that want a simple one-shot computation of blame for a file
  79. * should use {@link #computeBlameResult()} to prepare the entire result in one
  80. * method call. This may block for significant time as the history of the
  81. * repository must be traversed until information is gathered for every line.
  82. * <p>
  83. * Applications that want more incremental update behavior may use either the
  84. * raw {@link #next()} streaming approach supported by this class, or construct
  85. * a {@link BlameResult} using {@link BlameResult#create(BlameGenerator)} and
  86. * incrementally construct the result with {@link BlameResult#computeNext()}.
  87. * <p>
  88. * This class is not thread-safe.
  89. * <p>
  90. * An instance of BlameGenerator can only be used once. To blame multiple files
  91. * the application must create a new BlameGenerator.
  92. * <p>
  93. * During blame processing there are two files involved:
  94. * <ul>
  95. * <li>result - The file whose lines are being examined. This is the revision
  96. * the user is trying to view blame/annotation information alongside of.</li>
  97. * <li>source - The file that was blamed with supplying one or more lines of
  98. * data into result. The source may be a different file path (due to copy or
  99. * rename). Source line numbers may differ from result line numbers due to lines
  100. * being added/removed in intermediate revisions.</li>
  101. * </ul>
  102. * <p>
  103. * The blame algorithm is implemented by initially assigning responsibility for
  104. * all lines of the result to the starting commit. A difference against the
  105. * commit's ancestor is computed, and responsibility is passed to the ancestor
  106. * commit for any lines that are common. The starting commit is blamed only for
  107. * the lines that do not appear in the ancestor, if any. The loop repeats using
  108. * the ancestor, until there are no more lines to acquire information on, or the
  109. * file's creation point is discovered in history.
  110. */
  111. public class BlameGenerator {
  112. private final Repository repository;
  113. private final PathFilter resultPath;
  114. private final MutableObjectId idBuf;
  115. /** Revision pool used to acquire commits from. */
  116. private RevWalk revPool;
  117. /** Indicates the commit was put into the queue at least once. */
  118. private RevFlag SEEN;
  119. private ObjectReader reader;
  120. private TreeWalk treeWalk;
  121. private DiffAlgorithm diffAlgorithm = new HistogramDiff();
  122. private RawTextComparator textComparator = RawTextComparator.DEFAULT;
  123. private RenameDetector renameDetector;
  124. /** Potential candidates, sorted by commit time descending. */
  125. private Candidate queue;
  126. /** Number of lines that still need to be discovered. */
  127. private int remaining;
  128. /** Blame is currently assigned to this source. */
  129. private Candidate outCandidate;
  130. private Region outRegion;
  131. /**
  132. * Create a blame generator for the repository and path (relative to
  133. * repository)
  134. *
  135. * @param repository
  136. * repository to access revision data from.
  137. * @param path
  138. * initial path of the file to start scanning (relative to the
  139. * repository).
  140. */
  141. public BlameGenerator(Repository repository, String path) {
  142. this.repository = repository;
  143. this.resultPath = PathFilter.create(path);
  144. idBuf = new MutableObjectId();
  145. setFollowFileRenames(true);
  146. initRevPool(false);
  147. remaining = -1;
  148. }
  149. private void initRevPool(boolean reverse) {
  150. if (queue != null)
  151. throw new IllegalStateException();
  152. if (revPool != null)
  153. revPool.release();
  154. if (reverse)
  155. revPool = new ReverseWalk(getRepository());
  156. else
  157. revPool = new RevWalk(getRepository());
  158. revPool.setRetainBody(true);
  159. SEEN = revPool.newFlag("SEEN"); //$NON-NLS-1$
  160. reader = revPool.getObjectReader();
  161. treeWalk = new TreeWalk(reader);
  162. treeWalk.setRecursive(true);
  163. }
  164. /** @return repository being scanned for revision history. */
  165. public Repository getRepository() {
  166. return repository;
  167. }
  168. /** @return path file path being processed. */
  169. public String getResultPath() {
  170. return resultPath.getPath();
  171. }
  172. /**
  173. * Difference algorithm to use when comparing revisions.
  174. *
  175. * @param algorithm
  176. * @return {@code this}
  177. */
  178. public BlameGenerator setDiffAlgorithm(DiffAlgorithm algorithm) {
  179. diffAlgorithm = algorithm;
  180. return this;
  181. }
  182. /**
  183. * Text comparator to use when comparing revisions.
  184. *
  185. * @param comparator
  186. * @return {@code this}
  187. */
  188. public BlameGenerator setTextComparator(RawTextComparator comparator) {
  189. textComparator = comparator;
  190. return this;
  191. }
  192. /**
  193. * Enable (or disable) following file renames, on by default.
  194. * <p>
  195. * If true renames are followed using the standard FollowFilter behavior
  196. * used by RevWalk (which matches {@code git log --follow} in the C
  197. * implementation). This is not the same as copy/move detection as
  198. * implemented by the C implementation's of {@code git blame -M -C}.
  199. *
  200. * @param follow
  201. * enable following.
  202. * @return {@code this}
  203. */
  204. public BlameGenerator setFollowFileRenames(boolean follow) {
  205. if (follow)
  206. renameDetector = new RenameDetector(getRepository());
  207. else
  208. renameDetector = null;
  209. return this;
  210. }
  211. /**
  212. * Obtain the RenameDetector if {@code setFollowFileRenames(true)}.
  213. *
  214. * @return the rename detector, allowing the application to configure its
  215. * settings for rename score and breaking behavior.
  216. */
  217. public RenameDetector getRenameDetector() {
  218. return renameDetector;
  219. }
  220. /**
  221. * Push a candidate blob onto the generator's traversal stack.
  222. * <p>
  223. * Candidates should be pushed in history order from oldest-to-newest.
  224. * Applications should push the starting commit first, then the index
  225. * revision (if the index is interesting), and finally the working tree
  226. * copy (if the working tree is interesting).
  227. *
  228. * @param description
  229. * description of the blob revision, such as "Working Tree".
  230. * @param contents
  231. * contents of the file.
  232. * @return {@code this}
  233. * @throws IOException
  234. * the repository cannot be read.
  235. */
  236. public BlameGenerator push(String description, byte[] contents)
  237. throws IOException {
  238. return push(description, new RawText(contents));
  239. }
  240. /**
  241. * Push a candidate blob onto the generator's traversal stack.
  242. * <p>
  243. * Candidates should be pushed in history order from oldest-to-newest.
  244. * Applications should push the starting commit first, then the index
  245. * revision (if the index is interesting), and finally the working tree copy
  246. * (if the working tree is interesting).
  247. *
  248. * @param description
  249. * description of the blob revision, such as "Working Tree".
  250. * @param contents
  251. * contents of the file.
  252. * @return {@code this}
  253. * @throws IOException
  254. * the repository cannot be read.
  255. */
  256. public BlameGenerator push(String description, RawText contents)
  257. throws IOException {
  258. if (description == null)
  259. description = JGitText.get().blameNotCommittedYet;
  260. BlobCandidate c = new BlobCandidate(description, resultPath);
  261. c.sourceText = contents;
  262. c.regionList = new Region(0, 0, contents.size());
  263. remaining = contents.size();
  264. push(c);
  265. return this;
  266. }
  267. /**
  268. * Push a candidate object onto the generator's traversal stack.
  269. * <p>
  270. * Candidates should be pushed in history order from oldest-to-newest.
  271. * Applications should push the starting commit first, then the index
  272. * revision (if the index is interesting), and finally the working tree copy
  273. * (if the working tree is interesting).
  274. *
  275. * @param description
  276. * description of the blob revision, such as "Working Tree".
  277. * @param id
  278. * may be a commit or a blob.
  279. * @return {@code this}
  280. * @throws IOException
  281. * the repository cannot be read.
  282. */
  283. public BlameGenerator push(String description, AnyObjectId id)
  284. throws IOException {
  285. ObjectLoader ldr = reader.open(id);
  286. if (ldr.getType() == OBJ_BLOB) {
  287. if (description == null)
  288. description = JGitText.get().blameNotCommittedYet;
  289. BlobCandidate c = new BlobCandidate(description, resultPath);
  290. c.sourceBlob = id.toObjectId();
  291. c.sourceText = new RawText(ldr.getCachedBytes(Integer.MAX_VALUE));
  292. c.regionList = new Region(0, 0, c.sourceText.size());
  293. remaining = c.sourceText.size();
  294. push(c);
  295. return this;
  296. }
  297. RevCommit commit = revPool.parseCommit(id);
  298. if (!find(commit, resultPath))
  299. return this;
  300. Candidate c = new Candidate(commit, resultPath);
  301. c.sourceBlob = idBuf.toObjectId();
  302. c.loadText(reader);
  303. c.regionList = new Region(0, 0, c.sourceText.size());
  304. remaining = c.sourceText.size();
  305. push(c);
  306. return this;
  307. }
  308. /**
  309. * Configure the generator to compute reverse blame (history of deletes).
  310. * <p>
  311. * This method is expensive as it immediately runs a RevWalk over the
  312. * history spanning the expression {@code start..end} (end being more recent
  313. * than start) and then performs the equivalent operation as
  314. * {@link #push(String, AnyObjectId)} to begin blame traversal from the
  315. * commit named by {@code start} walking forwards through history until
  316. * {@code end} blaming line deletions.
  317. * <p>
  318. * A reverse blame may produce multiple sources for the same result line,
  319. * each of these is a descendant commit that removed the line, typically
  320. * this occurs when the same deletion appears in multiple side branches such
  321. * as due to a cherry-pick. Applications relying on reverse should use
  322. * {@link BlameResult} as it filters these duplicate sources and only
  323. * remembers the first (oldest) deletion.
  324. *
  325. * @param start
  326. * oldest commit to traverse from. The result file will be loaded
  327. * from this commit's tree.
  328. * @param end
  329. * most recent commit to stop traversal at. Usually an active
  330. * branch tip, tag, or HEAD.
  331. * @return {@code this}
  332. * @throws IOException
  333. * the repository cannot be read.
  334. */
  335. public BlameGenerator reverse(AnyObjectId start, AnyObjectId end)
  336. throws IOException {
  337. return reverse(start, Collections.singleton(end.toObjectId()));
  338. }
  339. /**
  340. * Configure the generator to compute reverse blame (history of deletes).
  341. * <p>
  342. * This method is expensive as it immediately runs a RevWalk over the
  343. * history spanning the expression {@code start..end} (end being more recent
  344. * than start) and then performs the equivalent operation as
  345. * {@link #push(String, AnyObjectId)} to begin blame traversal from the
  346. * commit named by {@code start} walking forwards through history until
  347. * {@code end} blaming line deletions.
  348. * <p>
  349. * A reverse blame may produce multiple sources for the same result line,
  350. * each of these is a descendant commit that removed the line, typically
  351. * this occurs when the same deletion appears in multiple side branches such
  352. * as due to a cherry-pick. Applications relying on reverse should use
  353. * {@link BlameResult} as it filters these duplicate sources and only
  354. * remembers the first (oldest) deletion.
  355. *
  356. * @param start
  357. * oldest commit to traverse from. The result file will be loaded
  358. * from this commit's tree.
  359. * @param end
  360. * most recent commits to stop traversal at. Usually an active
  361. * branch tip, tag, or HEAD.
  362. * @return {@code this}
  363. * @throws IOException
  364. * the repository cannot be read.
  365. */
  366. public BlameGenerator reverse(AnyObjectId start,
  367. Collection<? extends ObjectId> end) throws IOException {
  368. initRevPool(true);
  369. ReverseCommit result = (ReverseCommit) revPool.parseCommit(start);
  370. if (!find(result, resultPath))
  371. return this;
  372. revPool.markUninteresting(result);
  373. for (ObjectId id : end)
  374. revPool.markStart(revPool.parseCommit(id));
  375. while (revPool.next() != null) {
  376. // just pump the queue
  377. }
  378. ReverseCandidate c = new ReverseCandidate(result, resultPath);
  379. c.sourceBlob = idBuf.toObjectId();
  380. c.loadText(reader);
  381. c.regionList = new Region(0, 0, c.sourceText.size());
  382. remaining = c.sourceText.size();
  383. push(c);
  384. return this;
  385. }
  386. /**
  387. * Allocate a new RevFlag for use by the caller.
  388. *
  389. * @param name
  390. * unique name of the flag in the blame context.
  391. * @return the newly allocated flag.
  392. * @since 3.4
  393. */
  394. public RevFlag newFlag(String name) {
  395. return revPool.newFlag(name);
  396. }
  397. /**
  398. * Execute the generator in a blocking fashion until all data is ready.
  399. *
  400. * @return the complete result. Null if no file exists for the given path.
  401. * @throws IOException
  402. * the repository cannot be read.
  403. */
  404. public BlameResult computeBlameResult() throws IOException {
  405. try {
  406. BlameResult r = BlameResult.create(this);
  407. if (r != null)
  408. r.computeAll();
  409. return r;
  410. } finally {
  411. release();
  412. }
  413. }
  414. /**
  415. * Step the blame algorithm one iteration.
  416. *
  417. * @return true if the generator has found a region's source. The getSource*
  418. * and {@link #getResultStart()}, {@link #getResultEnd()} methods
  419. * can be used to inspect the region found. False if there are no
  420. * more regions to describe.
  421. * @throws IOException
  422. * repository cannot be read.
  423. */
  424. public boolean next() throws IOException {
  425. // If there is a source still pending, produce the next region.
  426. if (outRegion != null) {
  427. Region r = outRegion;
  428. remaining -= r.length;
  429. if (r.next != null) {
  430. outRegion = r.next;
  431. return true;
  432. }
  433. if (outCandidate.queueNext != null)
  434. return result(outCandidate.queueNext);
  435. outCandidate = null;
  436. outRegion = null;
  437. }
  438. // If there are no lines remaining, the entire result is done,
  439. // even if there are revisions still available for the path.
  440. if (remaining == 0)
  441. return done();
  442. for (;;) {
  443. Candidate n = pop();
  444. if (n == null)
  445. return done();
  446. int pCnt = n.getParentCount();
  447. if (pCnt == 1) {
  448. if (processOne(n))
  449. return true;
  450. } else if (1 < pCnt) {
  451. if (processMerge(n))
  452. return true;
  453. } else if (n instanceof ReverseCandidate) {
  454. // Do not generate a tip of a reverse. The region
  455. // survives and should not appear to be deleted.
  456. } else /* if (pCnt == 0) */{
  457. // Root commit, with at least one surviving region.
  458. // Assign the remaining blame here.
  459. return result(n);
  460. }
  461. }
  462. }
  463. private boolean done() {
  464. release();
  465. return false;
  466. }
  467. private boolean result(Candidate n) throws IOException {
  468. if (n.sourceCommit != null)
  469. revPool.parseBody(n.sourceCommit);
  470. outCandidate = n;
  471. outRegion = n.regionList;
  472. return true;
  473. }
  474. private boolean reverseResult(Candidate parent, Candidate source)
  475. throws IOException {
  476. // On a reverse blame present the application the parent
  477. // (as this is what did the removals), however the region
  478. // list to enumerate is the source's surviving list.
  479. Candidate res = parent.copy(parent.sourceCommit);
  480. res.regionList = source.regionList;
  481. return result(res);
  482. }
  483. private Candidate pop() {
  484. Candidate n = queue;
  485. if (n != null) {
  486. queue = n.queueNext;
  487. n.queueNext = null;
  488. }
  489. return n;
  490. }
  491. private void push(BlobCandidate toInsert) {
  492. Candidate c = queue;
  493. if (c != null) {
  494. c.remove(SEEN); // will be pushed by toInsert
  495. c.regionList = null;
  496. toInsert.parent = c;
  497. }
  498. queue = toInsert;
  499. }
  500. private void push(Candidate toInsert) {
  501. if (toInsert.has(SEEN)) {
  502. // We have already added a Candidate for this commit to the queue,
  503. // this can happen if the commit is a merge base for two or more
  504. // parallel branches that were merged together.
  505. //
  506. // It is likely the candidate was not yet processed. The queue
  507. // sorts descending by commit time and usually descendant commits
  508. // have higher timestamps than the ancestors.
  509. //
  510. // Find the existing candidate and merge the new candidate's
  511. // region list into it.
  512. for (Candidate p = queue; p != null; p = p.queueNext) {
  513. if (p.canMergeRegions(toInsert)) {
  514. p.mergeRegions(toInsert);
  515. return;
  516. }
  517. }
  518. }
  519. toInsert.add(SEEN);
  520. // Insert into the queue using descending commit time, so
  521. // the most recent commit will pop next.
  522. int time = toInsert.getTime();
  523. Candidate n = queue;
  524. if (n == null || time >= n.getTime()) {
  525. toInsert.queueNext = n;
  526. queue = toInsert;
  527. return;
  528. }
  529. for (Candidate p = n;; p = n) {
  530. n = p.queueNext;
  531. if (n == null || time >= n.getTime()) {
  532. toInsert.queueNext = n;
  533. p.queueNext = toInsert;
  534. return;
  535. }
  536. }
  537. }
  538. private boolean processOne(Candidate n) throws IOException {
  539. RevCommit parent = n.getParent(0);
  540. if (parent == null)
  541. return split(n.getNextCandidate(0), n);
  542. revPool.parseHeaders(parent);
  543. if (n.sourceCommit != null && n.recursivePath) {
  544. treeWalk.setFilter(AndTreeFilter.create(n.sourcePath, ID_DIFF));
  545. treeWalk.reset(n.sourceCommit.getTree(), parent.getTree());
  546. if (!treeWalk.next())
  547. return blameEntireRegionOnParent(n, parent);
  548. if (isFile(treeWalk.getRawMode(1))) {
  549. treeWalk.getObjectId(idBuf, 1);
  550. return splitBlameWithParent(n, parent);
  551. }
  552. } else if (find(parent, n.sourcePath)) {
  553. if (idBuf.equals(n.sourceBlob))
  554. return blameEntireRegionOnParent(n, parent);
  555. return splitBlameWithParent(n, parent);
  556. }
  557. if (n.sourceCommit == null)
  558. return result(n);
  559. DiffEntry r = findRename(parent, n.sourceCommit, n.sourcePath);
  560. if (r == null)
  561. return result(n);
  562. if (0 == r.getOldId().prefixCompare(n.sourceBlob)) {
  563. // A 100% rename without any content change can also
  564. // skip directly to the parent.
  565. n.sourceCommit = parent;
  566. n.setSourcePath(PathFilter.create(r.getOldPath()));
  567. push(n);
  568. return false;
  569. }
  570. Candidate next = n.create(parent, PathFilter.create(r.getOldPath()));
  571. next.sourceBlob = r.getOldId().toObjectId();
  572. next.renameScore = r.getScore();
  573. next.loadText(reader);
  574. return split(next, n);
  575. }
  576. private boolean blameEntireRegionOnParent(Candidate n, RevCommit parent) {
  577. // File was not modified, blame parent.
  578. n.sourceCommit = parent;
  579. push(n);
  580. return false;
  581. }
  582. private boolean splitBlameWithParent(Candidate n, RevCommit parent)
  583. throws IOException {
  584. Candidate next = n.create(parent, n.sourcePath);
  585. next.sourceBlob = idBuf.toObjectId();
  586. next.loadText(reader);
  587. return split(next, n);
  588. }
  589. private boolean split(Candidate parent, Candidate source)
  590. throws IOException {
  591. EditList editList = diffAlgorithm.diff(textComparator,
  592. parent.sourceText, source.sourceText);
  593. if (editList.isEmpty()) {
  594. // Ignoring whitespace (or some other special comparator) can
  595. // cause non-identical blobs to have an empty edit list. In
  596. // a case like this push the parent alone.
  597. parent.regionList = source.regionList;
  598. push(parent);
  599. return false;
  600. }
  601. parent.takeBlame(editList, source);
  602. if (parent.regionList != null)
  603. push(parent);
  604. if (source.regionList != null) {
  605. if (source instanceof ReverseCandidate)
  606. return reverseResult(parent, source);
  607. return result(source);
  608. }
  609. return false;
  610. }
  611. private boolean processMerge(Candidate n) throws IOException {
  612. int pCnt = n.getParentCount();
  613. // If any single parent exactly matches the merge, follow only
  614. // that one parent through history.
  615. ObjectId[] ids = null;
  616. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  617. RevCommit parent = n.getParent(pIdx);
  618. revPool.parseHeaders(parent);
  619. if (!find(parent, n.sourcePath))
  620. continue;
  621. if (!(n instanceof ReverseCandidate) && idBuf.equals(n.sourceBlob)) {
  622. return blameEntireRegionOnParent(n, parent);
  623. }
  624. if (ids == null)
  625. ids = new ObjectId[pCnt];
  626. ids[pIdx] = idBuf.toObjectId();
  627. }
  628. // If rename detection is enabled, search for any relevant names.
  629. DiffEntry[] renames = null;
  630. if (renameDetector != null) {
  631. renames = new DiffEntry[pCnt];
  632. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  633. RevCommit parent = n.getParent(pIdx);
  634. if (ids != null && ids[pIdx] != null)
  635. continue;
  636. DiffEntry r = findRename(parent, n.sourceCommit, n.sourcePath);
  637. if (r == null)
  638. continue;
  639. if (n instanceof ReverseCandidate) {
  640. if (ids == null)
  641. ids = new ObjectId[pCnt];
  642. ids[pCnt] = r.getOldId().toObjectId();
  643. } else if (0 == r.getOldId().prefixCompare(n.sourceBlob)) {
  644. // A 100% rename without any content change can also
  645. // skip directly to the parent. Note this bypasses an
  646. // earlier parent that had the path (above) but did not
  647. // have an exact content match. For performance reasons
  648. // we choose to follow the one parent over trying to do
  649. // possibly both parents.
  650. n.sourceCommit = parent;
  651. n.setSourcePath(PathFilter.create(r.getOldPath()));
  652. push(n);
  653. return false;
  654. }
  655. renames[pIdx] = r;
  656. }
  657. }
  658. // Construct the candidate for each parent.
  659. Candidate[] parents = new Candidate[pCnt];
  660. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  661. RevCommit parent = n.getParent(pIdx);
  662. Candidate p;
  663. if (renames != null && renames[pIdx] != null) {
  664. p = n.create(parent,
  665. PathFilter.create(renames[pIdx].getOldPath()));
  666. p.renameScore = renames[pIdx].getScore();
  667. p.sourceBlob = renames[pIdx].getOldId().toObjectId();
  668. } else if (ids != null && ids[pIdx] != null) {
  669. p = n.create(parent, n.sourcePath);
  670. p.sourceBlob = ids[pIdx];
  671. } else {
  672. continue;
  673. }
  674. EditList editList;
  675. if (n instanceof ReverseCandidate
  676. && p.sourceBlob.equals(n.sourceBlob)) {
  677. // This special case happens on ReverseCandidate forks.
  678. p.sourceText = n.sourceText;
  679. editList = new EditList(0);
  680. } else {
  681. p.loadText(reader);
  682. editList = diffAlgorithm.diff(textComparator,
  683. p.sourceText, n.sourceText);
  684. }
  685. if (editList.isEmpty()) {
  686. // Ignoring whitespace (or some other special comparator) can
  687. // cause non-identical blobs to have an empty edit list. In
  688. // a case like this push the parent alone.
  689. if (n instanceof ReverseCandidate) {
  690. parents[pIdx] = p;
  691. continue;
  692. }
  693. p.regionList = n.regionList;
  694. push(p);
  695. return false;
  696. }
  697. p.takeBlame(editList, n);
  698. // Only remember this parent candidate if there is at least
  699. // one region that was blamed on the parent.
  700. if (p.regionList != null) {
  701. // Reverse blame requires inverting the regions. This puts
  702. // the regions the parent deleted from us into the parent,
  703. // and retains the common regions to look at other parents
  704. // for deletions.
  705. if (n instanceof ReverseCandidate) {
  706. Region r = p.regionList;
  707. p.regionList = n.regionList;
  708. n.regionList = r;
  709. }
  710. parents[pIdx] = p;
  711. }
  712. }
  713. if (n instanceof ReverseCandidate) {
  714. // On a reverse blame report all deletions found in the children,
  715. // and pass on to them a copy of our region list.
  716. Candidate resultHead = null;
  717. Candidate resultTail = null;
  718. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  719. Candidate p = parents[pIdx];
  720. if (p == null)
  721. continue;
  722. if (p.regionList != null) {
  723. Candidate r = p.copy(p.sourceCommit);
  724. if (resultTail != null) {
  725. resultTail.queueNext = r;
  726. resultTail = r;
  727. } else {
  728. resultHead = r;
  729. resultTail = r;
  730. }
  731. }
  732. if (n.regionList != null) {
  733. p.regionList = n.regionList.deepCopy();
  734. push(p);
  735. }
  736. }
  737. if (resultHead != null)
  738. return result(resultHead);
  739. return false;
  740. }
  741. // Push any parents that are still candidates.
  742. for (int pIdx = 0; pIdx < pCnt; pIdx++) {
  743. if (parents[pIdx] != null)
  744. push(parents[pIdx]);
  745. }
  746. if (n.regionList != null)
  747. return result(n);
  748. return false;
  749. }
  750. /**
  751. * Get the revision blamed for the current region.
  752. * <p>
  753. * The source commit may be null if the line was blamed to an uncommitted
  754. * revision, such as the working tree copy, or during a reverse blame if the
  755. * line survives to the end revision (e.g. the branch tip).
  756. *
  757. * @return current revision being blamed.
  758. */
  759. public RevCommit getSourceCommit() {
  760. return outCandidate.sourceCommit;
  761. }
  762. /** @return current author being blamed. */
  763. public PersonIdent getSourceAuthor() {
  764. return outCandidate.getAuthor();
  765. }
  766. /** @return current committer being blamed. */
  767. public PersonIdent getSourceCommitter() {
  768. RevCommit c = getSourceCommit();
  769. return c != null ? c.getCommitterIdent() : null;
  770. }
  771. /** @return path of the file being blamed. */
  772. public String getSourcePath() {
  773. return outCandidate.sourcePath.getPath();
  774. }
  775. /** @return rename score if a rename occurred in {@link #getSourceCommit}. */
  776. public int getRenameScore() {
  777. return outCandidate.renameScore;
  778. }
  779. /**
  780. * @return first line of the source data that has been blamed for the
  781. * current region. This is line number of where the region was added
  782. * during {@link #getSourceCommit()} in file
  783. * {@link #getSourcePath()}.
  784. */
  785. public int getSourceStart() {
  786. return outRegion.sourceStart;
  787. }
  788. /**
  789. * @return one past the range of the source data that has been blamed for
  790. * the current region. This is line number of where the region was
  791. * added during {@link #getSourceCommit()} in file
  792. * {@link #getSourcePath()}.
  793. */
  794. public int getSourceEnd() {
  795. Region r = outRegion;
  796. return r.sourceStart + r.length;
  797. }
  798. /**
  799. * @return first line of the result that {@link #getSourceCommit()} has been
  800. * blamed for providing. Line numbers use 0 based indexing.
  801. */
  802. public int getResultStart() {
  803. return outRegion.resultStart;
  804. }
  805. /**
  806. * @return one past the range of the result that {@link #getSourceCommit()}
  807. * has been blamed for providing. Line numbers use 0 based indexing.
  808. * Because a source cannot be blamed for an empty region of the
  809. * result, {@link #getResultEnd()} is always at least one larger
  810. * than {@link #getResultStart()}.
  811. */
  812. public int getResultEnd() {
  813. Region r = outRegion;
  814. return r.resultStart + r.length;
  815. }
  816. /**
  817. * @return number of lines in the current region being blamed to
  818. * {@link #getSourceCommit()}. This is always the value of the
  819. * expression {@code getResultEnd() - getResultStart()}, but also
  820. * {@code getSourceEnd() - getSourceStart()}.
  821. */
  822. public int getRegionLength() {
  823. return outRegion.length;
  824. }
  825. /**
  826. * @return complete contents of the source file blamed for the current
  827. * output region. This is the contents of {@link #getSourcePath()}
  828. * within {@link #getSourceCommit()}. The source contents is
  829. * temporarily available as an artifact of the blame algorithm. Most
  830. * applications will want the result contents for display to users.
  831. */
  832. public RawText getSourceContents() {
  833. return outCandidate.sourceText;
  834. }
  835. /**
  836. * @return complete file contents of the result file blame is annotating.
  837. * This value is accessible only after being configured and only
  838. * immediately before the first call to {@link #next()}. Returns
  839. * null if the path does not exist.
  840. * @throws IOException
  841. * repository cannot be read.
  842. * @throws IllegalStateException
  843. * {@link #next()} has already been invoked.
  844. */
  845. public RawText getResultContents() throws IOException {
  846. return queue != null ? queue.sourceText : null;
  847. }
  848. /** Release the current blame session. */
  849. public void release() {
  850. revPool.release();
  851. queue = null;
  852. outCandidate = null;
  853. outRegion = null;
  854. }
  855. private boolean find(RevCommit commit, PathFilter path) throws IOException {
  856. treeWalk.setFilter(path);
  857. treeWalk.reset(commit.getTree());
  858. if (treeWalk.next() && isFile(treeWalk.getRawMode(0))) {
  859. treeWalk.getObjectId(idBuf, 0);
  860. return true;
  861. }
  862. return false;
  863. }
  864. private static final boolean isFile(int rawMode) {
  865. return (rawMode & TYPE_FILE) == TYPE_FILE;
  866. }
  867. private DiffEntry findRename(RevCommit parent, RevCommit commit,
  868. PathFilter path) throws IOException {
  869. if (renameDetector == null)
  870. return null;
  871. treeWalk.setFilter(TreeFilter.ANY_DIFF);
  872. treeWalk.reset(parent.getTree(), commit.getTree());
  873. renameDetector.reset();
  874. renameDetector.addAll(DiffEntry.scan(treeWalk));
  875. for (DiffEntry ent : renameDetector.compute()) {
  876. if (isRename(ent) && ent.getNewPath().equals(path.getPath()))
  877. return ent;
  878. }
  879. return null;
  880. }
  881. private static boolean isRename(DiffEntry ent) {
  882. return ent.getChangeType() == ChangeType.RENAME
  883. || ent.getChangeType() == ChangeType.COPY;
  884. }
  885. private static final TreeFilter ID_DIFF = new TreeFilter() {
  886. @Override
  887. public boolean include(TreeWalk tw) {
  888. return !tw.idEqual(0, 1);
  889. }
  890. @Override
  891. public boolean shouldBeRecursive() {
  892. return false;
  893. }
  894. @Override
  895. public TreeFilter clone() {
  896. return this;
  897. }
  898. @Override
  899. public String toString() {
  900. return "ID_DIFF"; //$NON-NLS-1$
  901. }
  902. };
  903. }