You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RenameDetector.java 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.diff;
  44. import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
  45. import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
  46. import java.io.IOException;
  47. import java.util.ArrayList;
  48. import java.util.Arrays;
  49. import java.util.Collection;
  50. import java.util.Collections;
  51. import java.util.Comparator;
  52. import java.util.HashMap;
  53. import java.util.List;
  54. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  55. import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
  56. import org.eclipse.jgit.errors.CancelledException;
  57. import org.eclipse.jgit.internal.JGitText;
  58. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  59. import org.eclipse.jgit.lib.FileMode;
  60. import org.eclipse.jgit.lib.NullProgressMonitor;
  61. import org.eclipse.jgit.lib.ObjectReader;
  62. import org.eclipse.jgit.lib.ProgressMonitor;
  63. import org.eclipse.jgit.lib.Repository;
  64. /**
  65. * Detect and resolve object renames.
  66. */
  67. public class RenameDetector {
  68. private static final int EXACT_RENAME_SCORE = 100;
  69. private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
  70. @Override
  71. public int compare(DiffEntry a, DiffEntry b) {
  72. int cmp = nameOf(a).compareTo(nameOf(b));
  73. if (cmp == 0)
  74. cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
  75. return cmp;
  76. }
  77. private String nameOf(DiffEntry ent) {
  78. // Sort by the new name, unless the change is a delete. On
  79. // deletes the new name is /dev/null, so we sort instead by
  80. // the old name.
  81. //
  82. if (ent.changeType == ChangeType.DELETE)
  83. return ent.oldPath;
  84. return ent.newPath;
  85. }
  86. private int sortOf(ChangeType changeType) {
  87. // Sort deletes before adds so that a major type change for
  88. // a file path (such as symlink to regular file) will first
  89. // remove the path, then add it back with the new type.
  90. //
  91. switch (changeType) {
  92. case DELETE:
  93. return 1;
  94. case ADD:
  95. return 2;
  96. default:
  97. return 10;
  98. }
  99. }
  100. };
  101. private List<DiffEntry> entries;
  102. private List<DiffEntry> deleted;
  103. private List<DiffEntry> added;
  104. private boolean done;
  105. private final ObjectReader objectReader;
  106. /** Similarity score required to pair an add/delete as a rename. */
  107. private int renameScore = 60;
  108. /**
  109. * Similarity score required to keep modified file pairs together. Any
  110. * modified file pairs with a similarity score below this will be broken
  111. * apart.
  112. */
  113. private int breakScore = -1;
  114. /** Limit in the number of files to consider for renames. */
  115. private int renameLimit;
  116. /** Set if the number of adds or deletes was over the limit. */
  117. private boolean overRenameLimit;
  118. /**
  119. * Create a new rename detector for the given repository
  120. *
  121. * @param repo
  122. * the repository to use for rename detection
  123. */
  124. public RenameDetector(Repository repo) {
  125. this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
  126. }
  127. /**
  128. * Create a new rename detector with a specified reader and diff config.
  129. *
  130. * @param reader
  131. * reader to obtain objects from the repository with.
  132. * @param cfg
  133. * diff config specifying rename detection options.
  134. * @since 3.0
  135. */
  136. public RenameDetector(ObjectReader reader, DiffConfig cfg) {
  137. objectReader = reader.newReader();
  138. renameLimit = cfg.getRenameLimit();
  139. reset();
  140. }
  141. /**
  142. * Get rename score
  143. *
  144. * @return minimum score required to pair an add/delete as a rename. The
  145. * score ranges are within the bounds of (0, 100).
  146. */
  147. public int getRenameScore() {
  148. return renameScore;
  149. }
  150. /**
  151. * Set the minimum score required to pair an add/delete as a rename.
  152. * <p>
  153. * When comparing two files together their score must be greater than or
  154. * equal to the rename score for them to be considered a rename match. The
  155. * score is computed based on content similarity, so a score of 60 implies
  156. * that approximately 60% of the bytes in the files are identical.
  157. *
  158. * @param score
  159. * new rename score, must be within [0, 100].
  160. * @throws java.lang.IllegalArgumentException
  161. * the score was not within [0, 100].
  162. */
  163. public void setRenameScore(int score) {
  164. if (score < 0 || score > 100)
  165. throw new IllegalArgumentException(
  166. JGitText.get().similarityScoreMustBeWithinBounds);
  167. renameScore = score;
  168. }
  169. /**
  170. * Get break score
  171. *
  172. * @return the similarity score required to keep modified file pairs
  173. * together. Any modify pairs that score below this will be broken
  174. * apart into separate add/deletes. Values less than or equal to
  175. * zero indicate that no modifies will be broken apart. Values over
  176. * 100 cause all modify pairs to be broken.
  177. */
  178. public int getBreakScore() {
  179. return breakScore;
  180. }
  181. /**
  182. * Set break score
  183. *
  184. * @param breakScore
  185. * the similarity score required to keep modified file pairs
  186. * together. Any modify pairs that score below this will be
  187. * broken apart into separate add/deletes. Values less than or
  188. * equal to zero indicate that no modifies will be broken apart.
  189. * Values over 100 cause all modify pairs to be broken.
  190. */
  191. public void setBreakScore(int breakScore) {
  192. this.breakScore = breakScore;
  193. }
  194. /**
  195. * Get rename limit
  196. *
  197. * @return limit on number of paths to perform inexact rename detection
  198. */
  199. public int getRenameLimit() {
  200. return renameLimit;
  201. }
  202. /**
  203. * Set the limit on the number of files to perform inexact rename detection.
  204. * <p>
  205. * The rename detector has to build a square matrix of the rename limit on
  206. * each side, then perform that many file compares to determine similarity.
  207. * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix
  208. * must be allocated, and 1,000,000 file compares may need to be performed.
  209. *
  210. * @param limit
  211. * new file limit. 0 means no limit; a negative number means no
  212. * inexact rename detection will be performed, only exact rename
  213. * detection.
  214. */
  215. public void setRenameLimit(int limit) {
  216. renameLimit = limit;
  217. }
  218. /**
  219. * Check if the detector is over the rename limit.
  220. * <p>
  221. * This method can be invoked either before or after {@code getEntries} has
  222. * been used to perform rename detection.
  223. *
  224. * @return true if the detector has more file additions or removals than the
  225. * rename limit is currently set to. In such configurations the
  226. * detector will skip expensive computation.
  227. */
  228. public boolean isOverRenameLimit() {
  229. if (done)
  230. return overRenameLimit;
  231. int cnt = Math.max(added.size(), deleted.size());
  232. return getRenameLimit() != 0 && getRenameLimit() < cnt;
  233. }
  234. /**
  235. * Add entries to be considered for rename detection.
  236. *
  237. * @param entriesToAdd
  238. * one or more entries to add.
  239. * @throws java.lang.IllegalStateException
  240. * if {@code getEntries} was already invoked.
  241. */
  242. public void addAll(Collection<DiffEntry> entriesToAdd) {
  243. if (done)
  244. throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
  245. for (DiffEntry entry : entriesToAdd) {
  246. switch (entry.getChangeType()) {
  247. case ADD:
  248. added.add(entry);
  249. break;
  250. case DELETE:
  251. deleted.add(entry);
  252. break;
  253. case MODIFY:
  254. if (sameType(entry.getOldMode(), entry.getNewMode())) {
  255. entries.add(entry);
  256. } else {
  257. List<DiffEntry> tmp = DiffEntry.breakModify(entry);
  258. deleted.add(tmp.get(0));
  259. added.add(tmp.get(1));
  260. }
  261. break;
  262. case COPY:
  263. case RENAME:
  264. default:
  265. entries.add(entry);
  266. }
  267. }
  268. }
  269. /**
  270. * Add an entry to be considered for rename detection.
  271. *
  272. * @param entry
  273. * to add.
  274. * @throws java.lang.IllegalStateException
  275. * if {@code getEntries} was already invoked.
  276. */
  277. public void add(DiffEntry entry) {
  278. addAll(Collections.singletonList(entry));
  279. }
  280. /**
  281. * Detect renames in the current file set.
  282. * <p>
  283. * This convenience function runs without a progress monitor.
  284. *
  285. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  286. * representing all files that have been changed.
  287. * @throws java.io.IOException
  288. * file contents cannot be read from the repository.
  289. */
  290. public List<DiffEntry> compute() throws IOException {
  291. return compute(NullProgressMonitor.INSTANCE);
  292. }
  293. /**
  294. * Detect renames in the current file set.
  295. *
  296. * @param pm
  297. * report progress during the detection phases.
  298. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  299. * representing all files that have been changed.
  300. * @throws java.io.IOException
  301. * file contents cannot be read from the repository.
  302. * @throws CancelledException
  303. * if rename detection was cancelled
  304. */
  305. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  306. // version
  307. public List<DiffEntry> compute(ProgressMonitor pm)
  308. throws IOException, CancelledException {
  309. if (!done) {
  310. try {
  311. return compute(objectReader, pm);
  312. } finally {
  313. objectReader.close();
  314. }
  315. }
  316. return Collections.unmodifiableList(entries);
  317. }
  318. /**
  319. * Detect renames in the current file set.
  320. *
  321. * @param reader
  322. * reader to obtain objects from the repository with.
  323. * @param pm
  324. * report progress during the detection phases.
  325. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  326. * representing all files that have been changed.
  327. * @throws java.io.IOException
  328. * file contents cannot be read from the repository.
  329. * @throws CancelledException
  330. * if rename detection was cancelled
  331. */
  332. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  333. // version
  334. public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
  335. throws IOException, CancelledException {
  336. final ContentSource cs = ContentSource.create(reader);
  337. return compute(new ContentSource.Pair(cs, cs), pm);
  338. }
  339. /**
  340. * Detect renames in the current file set.
  341. *
  342. * @param reader
  343. * reader to obtain objects from the repository with.
  344. * @param pm
  345. * report progress during the detection phases.
  346. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  347. * representing all files that have been changed.
  348. * @throws java.io.IOException
  349. * file contents cannot be read from the repository.
  350. * @throws CancelledException
  351. * if rename detection was cancelled
  352. */
  353. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  354. // version
  355. public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
  356. throws IOException, CancelledException {
  357. if (!done) {
  358. done = true;
  359. if (pm == null)
  360. pm = NullProgressMonitor.INSTANCE;
  361. if (0 < breakScore)
  362. breakModifies(reader, pm);
  363. if (!added.isEmpty() && !deleted.isEmpty())
  364. findExactRenames(pm);
  365. if (!added.isEmpty() && !deleted.isEmpty())
  366. findContentRenames(reader, pm);
  367. if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
  368. rejoinModifies(pm);
  369. entries.addAll(added);
  370. added = null;
  371. entries.addAll(deleted);
  372. deleted = null;
  373. Collections.sort(entries, DIFF_COMPARATOR);
  374. }
  375. return Collections.unmodifiableList(entries);
  376. }
  377. /**
  378. * Reset this rename detector for another rename detection pass.
  379. */
  380. public void reset() {
  381. entries = new ArrayList<>();
  382. deleted = new ArrayList<>();
  383. added = new ArrayList<>();
  384. done = false;
  385. }
  386. private void advanceOrCancel(ProgressMonitor pm) throws CancelledException {
  387. if (pm.isCancelled()) {
  388. throw new CancelledException(JGitText.get().renameCancelled);
  389. }
  390. pm.update(1);
  391. }
  392. private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
  393. throws IOException, CancelledException {
  394. ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
  395. pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
  396. for (int i = 0; i < entries.size(); i++) {
  397. DiffEntry e = entries.get(i);
  398. if (e.getChangeType() == ChangeType.MODIFY) {
  399. int score = calculateModifyScore(reader, e);
  400. if (score < breakScore) {
  401. List<DiffEntry> tmp = DiffEntry.breakModify(e);
  402. DiffEntry del = tmp.get(0);
  403. del.score = score;
  404. deleted.add(del);
  405. added.add(tmp.get(1));
  406. } else {
  407. newEntries.add(e);
  408. }
  409. } else {
  410. newEntries.add(e);
  411. }
  412. advanceOrCancel(pm);
  413. }
  414. entries = newEntries;
  415. }
  416. private void rejoinModifies(ProgressMonitor pm) throws CancelledException {
  417. HashMap<String, DiffEntry> nameMap = new HashMap<>();
  418. ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
  419. pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
  420. + deleted.size());
  421. for (DiffEntry src : deleted) {
  422. nameMap.put(src.oldPath, src);
  423. advanceOrCancel(pm);
  424. }
  425. for (DiffEntry dst : added) {
  426. DiffEntry src = nameMap.remove(dst.newPath);
  427. if (src != null) {
  428. if (sameType(src.oldMode, dst.newMode)) {
  429. entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
  430. src.score));
  431. } else {
  432. nameMap.put(src.oldPath, src);
  433. newAdded.add(dst);
  434. }
  435. } else {
  436. newAdded.add(dst);
  437. }
  438. advanceOrCancel(pm);
  439. }
  440. added = newAdded;
  441. deleted = new ArrayList<>(nameMap.values());
  442. }
  443. private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
  444. throws IOException {
  445. try {
  446. SimilarityIndex src = new SimilarityIndex();
  447. src.hash(reader.open(OLD, d));
  448. src.sort();
  449. SimilarityIndex dst = new SimilarityIndex();
  450. dst.hash(reader.open(NEW, d));
  451. dst.sort();
  452. return src.score(dst, 100);
  453. } catch (TableFullException tableFull) {
  454. // If either table overflowed while being constructed, don't allow
  455. // the pair to be broken. Returning 1 higher than breakScore will
  456. // ensure its not similar, but not quite dissimilar enough to break.
  457. //
  458. overRenameLimit = true;
  459. return breakScore + 1;
  460. }
  461. }
  462. private void findContentRenames(ContentSource.Pair reader,
  463. ProgressMonitor pm)
  464. throws IOException, CancelledException {
  465. int cnt = Math.max(added.size(), deleted.size());
  466. if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
  467. SimilarityRenameDetector d;
  468. d = new SimilarityRenameDetector(reader, deleted, added);
  469. d.setRenameScore(getRenameScore());
  470. d.compute(pm);
  471. overRenameLimit |= d.isTableOverflow();
  472. deleted = d.getLeftOverSources();
  473. added = d.getLeftOverDestinations();
  474. entries.addAll(d.getMatches());
  475. } else {
  476. overRenameLimit = true;
  477. }
  478. }
  479. @SuppressWarnings("unchecked")
  480. private void findExactRenames(ProgressMonitor pm)
  481. throws CancelledException {
  482. pm.beginTask(JGitText.get().renamesFindingExact, //
  483. added.size() + added.size() + deleted.size()
  484. + added.size() * deleted.size());
  485. HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
  486. HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
  487. ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
  488. ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
  489. for (Object o : addedMap.values()) {
  490. if (o instanceof DiffEntry)
  491. uniqueAdds.add((DiffEntry) o);
  492. else
  493. nonUniqueAdds.add((List<DiffEntry>) o);
  494. }
  495. ArrayList<DiffEntry> left = new ArrayList<>(added.size());
  496. for (DiffEntry a : uniqueAdds) {
  497. Object del = deletedMap.get(a.newId);
  498. if (del instanceof DiffEntry) {
  499. // We have one add to one delete: pair them if they are the same
  500. // type
  501. DiffEntry e = (DiffEntry) del;
  502. if (sameType(e.oldMode, a.newMode)) {
  503. e.changeType = ChangeType.RENAME;
  504. entries.add(exactRename(e, a));
  505. } else {
  506. left.add(a);
  507. }
  508. } else if (del != null) {
  509. // We have one add to many deletes: find the delete with the
  510. // same type and closest name to the add, then pair them
  511. List<DiffEntry> list = (List<DiffEntry>) del;
  512. DiffEntry best = bestPathMatch(a, list);
  513. if (best != null) {
  514. best.changeType = ChangeType.RENAME;
  515. entries.add(exactRename(best, a));
  516. } else {
  517. left.add(a);
  518. }
  519. } else {
  520. left.add(a);
  521. }
  522. advanceOrCancel(pm);
  523. }
  524. for (List<DiffEntry> adds : nonUniqueAdds) {
  525. Object o = deletedMap.get(adds.get(0).newId);
  526. if (o instanceof DiffEntry) {
  527. // We have many adds to one delete: find the add with the same
  528. // type and closest name to the delete, then pair them. Mark the
  529. // rest as copies of the delete.
  530. DiffEntry d = (DiffEntry) o;
  531. DiffEntry best = bestPathMatch(d, adds);
  532. if (best != null) {
  533. d.changeType = ChangeType.RENAME;
  534. entries.add(exactRename(d, best));
  535. for (DiffEntry a : adds) {
  536. if (a != best) {
  537. if (sameType(d.oldMode, a.newMode)) {
  538. entries.add(exactCopy(d, a));
  539. } else {
  540. left.add(a);
  541. }
  542. }
  543. }
  544. } else {
  545. left.addAll(adds);
  546. }
  547. } else if (o != null) {
  548. // We have many adds to many deletes: score all the adds against
  549. // all the deletes by path name, take the best matches, pair
  550. // them as renames, then call the rest copies
  551. List<DiffEntry> dels = (List<DiffEntry>) o;
  552. long[] matrix = new long[dels.size() * adds.size()];
  553. int mNext = 0;
  554. for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
  555. String deletedName = dels.get(delIdx).oldPath;
  556. for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
  557. String addedName = adds.get(addIdx).newPath;
  558. int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
  559. matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
  560. mNext++;
  561. if (pm.isCancelled()) {
  562. throw new CancelledException(
  563. JGitText.get().renameCancelled);
  564. }
  565. }
  566. }
  567. Arrays.sort(matrix);
  568. for (--mNext; mNext >= 0; mNext--) {
  569. long ent = matrix[mNext];
  570. int delIdx = SimilarityRenameDetector.srcFile(ent);
  571. int addIdx = SimilarityRenameDetector.dstFile(ent);
  572. DiffEntry d = dels.get(delIdx);
  573. DiffEntry a = adds.get(addIdx);
  574. if (a == null) {
  575. advanceOrCancel(pm);
  576. continue; // was already matched earlier
  577. }
  578. ChangeType type;
  579. if (d.changeType == ChangeType.DELETE) {
  580. // First use of this source file. Tag it as a rename so we
  581. // later know it is already been used as a rename, other
  582. // matches (if any) will claim themselves as copies instead.
  583. //
  584. d.changeType = ChangeType.RENAME;
  585. type = ChangeType.RENAME;
  586. } else {
  587. type = ChangeType.COPY;
  588. }
  589. entries.add(DiffEntry.pair(type, d, a, 100));
  590. adds.set(addIdx, null); // Claim the destination was matched.
  591. advanceOrCancel(pm);
  592. }
  593. } else {
  594. left.addAll(adds);
  595. }
  596. advanceOrCancel(pm);
  597. }
  598. added = left;
  599. deleted = new ArrayList<>(deletedMap.size());
  600. for (Object o : deletedMap.values()) {
  601. if (o instanceof DiffEntry) {
  602. DiffEntry e = (DiffEntry) o;
  603. if (e.changeType == ChangeType.DELETE)
  604. deleted.add(e);
  605. } else {
  606. List<DiffEntry> list = (List<DiffEntry>) o;
  607. for (DiffEntry e : list) {
  608. if (e.changeType == ChangeType.DELETE)
  609. deleted.add(e);
  610. }
  611. }
  612. }
  613. pm.endTask();
  614. }
  615. /**
  616. * Find the best match by file path for a given DiffEntry from a list of
  617. * DiffEntrys. The returned DiffEntry will be of the same type as <src>. If
  618. * no DiffEntry can be found that has the same type, this method will return
  619. * null.
  620. *
  621. * @param src
  622. * the DiffEntry to try to find a match for
  623. * @param list
  624. * a list of DiffEntrys to search through
  625. * @return the DiffEntry from <list> who's file path best matches <src>
  626. */
  627. private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
  628. DiffEntry best = null;
  629. int score = -1;
  630. for (DiffEntry d : list) {
  631. if (sameType(mode(d), mode(src))) {
  632. int tmp = SimilarityRenameDetector
  633. .nameScore(path(d), path(src));
  634. if (tmp > score) {
  635. best = d;
  636. score = tmp;
  637. }
  638. }
  639. }
  640. return best;
  641. }
  642. @SuppressWarnings("unchecked")
  643. private HashMap<AbbreviatedObjectId, Object> populateMap(
  644. List<DiffEntry> diffEntries, ProgressMonitor pm)
  645. throws CancelledException {
  646. HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
  647. for (DiffEntry de : diffEntries) {
  648. Object old = map.put(id(de), de);
  649. if (old instanceof DiffEntry) {
  650. ArrayList<DiffEntry> list = new ArrayList<>(2);
  651. list.add((DiffEntry) old);
  652. list.add(de);
  653. map.put(id(de), list);
  654. } else if (old != null) {
  655. // Must be a list of DiffEntries
  656. ((List<DiffEntry>) old).add(de);
  657. map.put(id(de), old);
  658. }
  659. advanceOrCancel(pm);
  660. }
  661. return map;
  662. }
  663. private static String path(DiffEntry de) {
  664. return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
  665. }
  666. private static FileMode mode(DiffEntry de) {
  667. return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
  668. }
  669. private static AbbreviatedObjectId id(DiffEntry de) {
  670. return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
  671. }
  672. static boolean sameType(FileMode a, FileMode b) {
  673. // Files have to be of the same type in order to rename them.
  674. // We would never want to rename a file to a gitlink, or a
  675. // symlink to a file.
  676. //
  677. int aType = a.getBits() & FileMode.TYPE_MASK;
  678. int bType = b.getBits() & FileMode.TYPE_MASK;
  679. return aType == bType;
  680. }
  681. private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
  682. return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
  683. }
  684. private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
  685. return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
  686. }
  687. }