You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RenameDetector.java 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. /*
  2. * Copyright (C) 2010, Google Inc. and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.diff;
  11. import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
  12. import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
  13. import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
  14. import java.io.IOException;
  15. import java.util.ArrayList;
  16. import java.util.Arrays;
  17. import java.util.Collection;
  18. import java.util.Collections;
  19. import java.util.Comparator;
  20. import java.util.HashMap;
  21. import java.util.List;
  22. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  23. import org.eclipse.jgit.diff.SimilarityIndex.TableFullException;
  24. import org.eclipse.jgit.errors.CancelledException;
  25. import org.eclipse.jgit.internal.JGitText;
  26. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  27. import org.eclipse.jgit.lib.FileMode;
  28. import org.eclipse.jgit.lib.NullProgressMonitor;
  29. import org.eclipse.jgit.lib.ObjectReader;
  30. import org.eclipse.jgit.lib.ProgressMonitor;
  31. import org.eclipse.jgit.lib.Repository;
  32. /**
  33. * Detect and resolve object renames.
  34. */
  35. public class RenameDetector {
  36. private static final int EXACT_RENAME_SCORE = 100;
  37. private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
  38. @Override
  39. public int compare(DiffEntry a, DiffEntry b) {
  40. int cmp = nameOf(a).compareTo(nameOf(b));
  41. if (cmp == 0)
  42. cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
  43. return cmp;
  44. }
  45. private String nameOf(DiffEntry ent) {
  46. // Sort by the new name, unless the change is a delete. On
  47. // deletes the new name is /dev/null, so we sort instead by
  48. // the old name.
  49. //
  50. if (ent.changeType == ChangeType.DELETE)
  51. return ent.oldPath;
  52. return ent.newPath;
  53. }
  54. private int sortOf(ChangeType changeType) {
  55. // Sort deletes before adds so that a major type change for
  56. // a file path (such as symlink to regular file) will first
  57. // remove the path, then add it back with the new type.
  58. //
  59. switch (changeType) {
  60. case DELETE:
  61. return 1;
  62. case ADD:
  63. return 2;
  64. default:
  65. return 10;
  66. }
  67. }
  68. };
  69. private List<DiffEntry> entries;
  70. private List<DiffEntry> deleted;
  71. private List<DiffEntry> added;
  72. private boolean done;
  73. private final ObjectReader objectReader;
  74. /** Similarity score required to pair an add/delete as a rename. */
  75. private int renameScore = 60;
  76. /**
  77. * Similarity score required to keep modified file pairs together. Any
  78. * modified file pairs with a similarity score below this will be broken
  79. * apart.
  80. */
  81. private int breakScore = -1;
  82. /** Limit in the number of files to consider for renames. */
  83. private int renameLimit;
  84. /**
  85. * File size threshold (in bytes) for detecting renames. Files larger
  86. * than this size will not be processed for renames.
  87. */
  88. private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
  89. /**
  90. * Skip detecting content renames for binary files. Content renames are
  91. * those that are not exact, that is with a slight content modification
  92. * between the two files.
  93. */
  94. private boolean skipContentRenamesForBinaryFiles = false;
  95. /** Set if the number of adds or deletes was over the limit. */
  96. private boolean overRenameLimit;
  97. /**
  98. * Create a new rename detector for the given repository
  99. *
  100. * @param repo
  101. * the repository to use for rename detection
  102. */
  103. public RenameDetector(Repository repo) {
  104. this(repo.newObjectReader(), repo.getConfig().get(DiffConfig.KEY));
  105. }
  106. /**
  107. * Create a new rename detector with a specified reader and diff config.
  108. *
  109. * @param reader
  110. * reader to obtain objects from the repository with.
  111. * @param cfg
  112. * diff config specifying rename detection options.
  113. * @since 3.0
  114. */
  115. public RenameDetector(ObjectReader reader, DiffConfig cfg) {
  116. objectReader = reader.newReader();
  117. renameLimit = cfg.getRenameLimit();
  118. reset();
  119. }
  120. /**
  121. * Get rename score
  122. *
  123. * @return minimum score required to pair an add/delete as a rename. The
  124. * score ranges are within the bounds of (0, 100).
  125. */
  126. public int getRenameScore() {
  127. return renameScore;
  128. }
  129. /**
  130. * Set the minimum score required to pair an add/delete as a rename.
  131. * <p>
  132. * When comparing two files together their score must be greater than or
  133. * equal to the rename score for them to be considered a rename match. The
  134. * score is computed based on content similarity, so a score of 60 implies
  135. * that approximately 60% of the bytes in the files are identical.
  136. *
  137. * @param score
  138. * new rename score, must be within [0, 100].
  139. * @throws java.lang.IllegalArgumentException
  140. * the score was not within [0, 100].
  141. */
  142. public void setRenameScore(int score) {
  143. if (score < 0 || score > 100)
  144. throw new IllegalArgumentException(
  145. JGitText.get().similarityScoreMustBeWithinBounds);
  146. renameScore = score;
  147. }
  148. /**
  149. * Get break score
  150. *
  151. * @return the similarity score required to keep modified file pairs
  152. * together. Any modify pairs that score below this will be broken
  153. * apart into separate add/deletes. Values less than or equal to
  154. * zero indicate that no modifies will be broken apart. Values over
  155. * 100 cause all modify pairs to be broken.
  156. */
  157. public int getBreakScore() {
  158. return breakScore;
  159. }
  160. /**
  161. * Set break score
  162. *
  163. * @param breakScore
  164. * the similarity score required to keep modified file pairs
  165. * together. Any modify pairs that score below this will be
  166. * broken apart into separate add/deletes. Values less than or
  167. * equal to zero indicate that no modifies will be broken apart.
  168. * Values over 100 cause all modify pairs to be broken.
  169. */
  170. public void setBreakScore(int breakScore) {
  171. this.breakScore = breakScore;
  172. }
  173. /**
  174. * Get rename limit
  175. *
  176. * @return limit on number of paths to perform inexact rename detection
  177. */
  178. public int getRenameLimit() {
  179. return renameLimit;
  180. }
  181. /**
  182. * Set the limit on the number of files to perform inexact rename detection.
  183. * <p>
  184. * The rename detector has to build a square matrix of the rename limit on
  185. * each side, then perform that many file compares to determine similarity.
  186. * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix
  187. * must be allocated, and 1,000,000 file compares may need to be performed.
  188. *
  189. * @param limit
  190. * new file limit. 0 means no limit; a negative number means no
  191. * inexact rename detection will be performed, only exact rename
  192. * detection.
  193. */
  194. public void setRenameLimit(int limit) {
  195. renameLimit = limit;
  196. }
  197. /**
  198. * Get file size threshold for detecting renames. Files larger
  199. * than this size will not be processed for rename detection.
  200. *
  201. * @return threshold in bytes of the file size.
  202. * @since 5.12
  203. */
  204. public int getBigFileThreshold() { return bigFileThreshold; }
  205. /**
  206. * Set the file size threshold for detecting renames. Files larger than this
  207. * threshold will be skipped during rename detection computation.
  208. *
  209. * @param threshold file size threshold in bytes.
  210. * @since 5.12
  211. */
  212. public void setBigFileThreshold(int threshold) {
  213. this.bigFileThreshold = threshold;
  214. }
  215. /**
  216. * Get skipping detecting content renames for binary files.
  217. *
  218. * @return true if content renames should be skipped for binary files, false otherwise.
  219. * @since 5.12
  220. */
  221. public boolean getSkipContentRenamesForBinaryFiles() {
  222. return skipContentRenamesForBinaryFiles;
  223. }
  224. /**
  225. * Sets skipping detecting content renames for binary files.
  226. *
  227. * @param value true if content renames should be skipped for binary files, false otherwise.
  228. * @since 5.12
  229. */
  230. public void setSkipContentRenamesForBinaryFiles(boolean value) {
  231. this.skipContentRenamesForBinaryFiles = value;
  232. }
  233. /**
  234. * Check if the detector is over the rename limit.
  235. * <p>
  236. * This method can be invoked either before or after {@code getEntries} has
  237. * been used to perform rename detection.
  238. *
  239. * @return true if the detector has more file additions or removals than the
  240. * rename limit is currently set to. In such configurations the
  241. * detector will skip expensive computation.
  242. */
  243. public boolean isOverRenameLimit() {
  244. if (done)
  245. return overRenameLimit;
  246. int cnt = Math.max(added.size(), deleted.size());
  247. return getRenameLimit() != 0 && getRenameLimit() < cnt;
  248. }
  249. /**
  250. * Add entries to be considered for rename detection.
  251. *
  252. * @param entriesToAdd
  253. * one or more entries to add.
  254. * @throws java.lang.IllegalStateException
  255. * if {@code getEntries} was already invoked.
  256. */
  257. public void addAll(Collection<DiffEntry> entriesToAdd) {
  258. if (done)
  259. throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
  260. for (DiffEntry entry : entriesToAdd) {
  261. switch (entry.getChangeType()) {
  262. case ADD:
  263. added.add(entry);
  264. break;
  265. case DELETE:
  266. deleted.add(entry);
  267. break;
  268. case MODIFY:
  269. if (sameType(entry.getOldMode(), entry.getNewMode())) {
  270. entries.add(entry);
  271. } else {
  272. List<DiffEntry> tmp = DiffEntry.breakModify(entry);
  273. deleted.add(tmp.get(0));
  274. added.add(tmp.get(1));
  275. }
  276. break;
  277. case COPY:
  278. case RENAME:
  279. default:
  280. entries.add(entry);
  281. }
  282. }
  283. }
  284. /**
  285. * Add an entry to be considered for rename detection.
  286. *
  287. * @param entry
  288. * to add.
  289. * @throws java.lang.IllegalStateException
  290. * if {@code getEntries} was already invoked.
  291. */
  292. public void add(DiffEntry entry) {
  293. addAll(Collections.singletonList(entry));
  294. }
  295. /**
  296. * Detect renames in the current file set.
  297. * <p>
  298. * This convenience function runs without a progress monitor.
  299. *
  300. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  301. * representing all files that have been changed.
  302. * @throws java.io.IOException
  303. * file contents cannot be read from the repository.
  304. */
  305. public List<DiffEntry> compute() throws IOException {
  306. return compute(NullProgressMonitor.INSTANCE);
  307. }
  308. /**
  309. * Detect renames in the current file set.
  310. *
  311. * @param pm
  312. * report progress during the detection phases.
  313. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  314. * representing all files that have been changed.
  315. * @throws java.io.IOException
  316. * file contents cannot be read from the repository.
  317. * @throws CancelledException
  318. * if rename detection was cancelled
  319. */
  320. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  321. // version
  322. public List<DiffEntry> compute(ProgressMonitor pm)
  323. throws IOException, CancelledException {
  324. if (!done) {
  325. try {
  326. return compute(objectReader, pm);
  327. } finally {
  328. objectReader.close();
  329. }
  330. }
  331. return Collections.unmodifiableList(entries);
  332. }
  333. /**
  334. * Detect renames in the current file set.
  335. *
  336. * @param reader
  337. * reader to obtain objects from the repository with.
  338. * @param pm
  339. * report progress during the detection phases.
  340. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  341. * representing all files that have been changed.
  342. * @throws java.io.IOException
  343. * file contents cannot be read from the repository.
  344. * @throws CancelledException
  345. * if rename detection was cancelled
  346. */
  347. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  348. // version
  349. public List<DiffEntry> compute(ObjectReader reader, ProgressMonitor pm)
  350. throws IOException, CancelledException {
  351. final ContentSource cs = ContentSource.create(reader);
  352. return compute(new ContentSource.Pair(cs, cs), pm);
  353. }
  354. /**
  355. * Detect renames in the current file set.
  356. *
  357. * @param reader
  358. * reader to obtain objects from the repository with.
  359. * @param pm
  360. * report progress during the detection phases.
  361. * @return an unmodifiable list of {@link org.eclipse.jgit.diff.DiffEntry}s
  362. * representing all files that have been changed.
  363. * @throws java.io.IOException
  364. * file contents cannot be read from the repository.
  365. * @throws CancelledException
  366. * if rename detection was cancelled
  367. */
  368. // TODO(ms): use org.eclipse.jgit.api.errors.CanceledException in next major
  369. // version
  370. public List<DiffEntry> compute(ContentSource.Pair reader, ProgressMonitor pm)
  371. throws IOException, CancelledException {
  372. if (!done) {
  373. done = true;
  374. if (pm == null)
  375. pm = NullProgressMonitor.INSTANCE;
  376. if (0 < breakScore)
  377. breakModifies(reader, pm);
  378. if (!added.isEmpty() && !deleted.isEmpty())
  379. findExactRenames(pm);
  380. if (!added.isEmpty() && !deleted.isEmpty())
  381. findContentRenames(reader, pm);
  382. if (0 < breakScore && !added.isEmpty() && !deleted.isEmpty())
  383. rejoinModifies(pm);
  384. entries.addAll(added);
  385. added = null;
  386. entries.addAll(deleted);
  387. deleted = null;
  388. Collections.sort(entries, DIFF_COMPARATOR);
  389. }
  390. return Collections.unmodifiableList(entries);
  391. }
  392. /**
  393. * Reset this rename detector for another rename detection pass.
  394. */
  395. public void reset() {
  396. entries = new ArrayList<>();
  397. deleted = new ArrayList<>();
  398. added = new ArrayList<>();
  399. done = false;
  400. }
  401. private void advanceOrCancel(ProgressMonitor pm) throws CancelledException {
  402. if (pm.isCancelled()) {
  403. throw new CancelledException(JGitText.get().renameCancelled);
  404. }
  405. pm.update(1);
  406. }
  407. private void breakModifies(ContentSource.Pair reader, ProgressMonitor pm)
  408. throws IOException, CancelledException {
  409. ArrayList<DiffEntry> newEntries = new ArrayList<>(entries.size());
  410. pm.beginTask(JGitText.get().renamesBreakingModifies, entries.size());
  411. for (int i = 0; i < entries.size(); i++) {
  412. DiffEntry e = entries.get(i);
  413. if (e.getChangeType() == ChangeType.MODIFY) {
  414. int score = calculateModifyScore(reader, e);
  415. if (score < breakScore) {
  416. List<DiffEntry> tmp = DiffEntry.breakModify(e);
  417. DiffEntry del = tmp.get(0);
  418. del.score = score;
  419. deleted.add(del);
  420. added.add(tmp.get(1));
  421. } else {
  422. newEntries.add(e);
  423. }
  424. } else {
  425. newEntries.add(e);
  426. }
  427. advanceOrCancel(pm);
  428. }
  429. entries = newEntries;
  430. }
  431. private void rejoinModifies(ProgressMonitor pm) throws CancelledException {
  432. HashMap<String, DiffEntry> nameMap = new HashMap<>();
  433. ArrayList<DiffEntry> newAdded = new ArrayList<>(added.size());
  434. pm.beginTask(JGitText.get().renamesRejoiningModifies, added.size()
  435. + deleted.size());
  436. for (DiffEntry src : deleted) {
  437. nameMap.put(src.oldPath, src);
  438. advanceOrCancel(pm);
  439. }
  440. for (DiffEntry dst : added) {
  441. DiffEntry src = nameMap.remove(dst.newPath);
  442. if (src != null) {
  443. if (sameType(src.oldMode, dst.newMode)) {
  444. entries.add(DiffEntry.pair(ChangeType.MODIFY, src, dst,
  445. src.score));
  446. } else {
  447. nameMap.put(src.oldPath, src);
  448. newAdded.add(dst);
  449. }
  450. } else {
  451. newAdded.add(dst);
  452. }
  453. advanceOrCancel(pm);
  454. }
  455. added = newAdded;
  456. deleted = new ArrayList<>(nameMap.values());
  457. }
  458. private int calculateModifyScore(ContentSource.Pair reader, DiffEntry d)
  459. throws IOException {
  460. try {
  461. SimilarityIndex src = new SimilarityIndex();
  462. src.hash(reader.open(OLD, d));
  463. src.sort();
  464. SimilarityIndex dst = new SimilarityIndex();
  465. dst.hash(reader.open(NEW, d));
  466. dst.sort();
  467. return src.score(dst, 100);
  468. } catch (TableFullException tableFull) {
  469. // If either table overflowed while being constructed, don't allow
  470. // the pair to be broken. Returning 1 higher than breakScore will
  471. // ensure its not similar, but not quite dissimilar enough to break.
  472. //
  473. overRenameLimit = true;
  474. return breakScore + 1;
  475. }
  476. }
  477. private void findContentRenames(ContentSource.Pair reader,
  478. ProgressMonitor pm)
  479. throws IOException, CancelledException {
  480. int cnt = Math.max(added.size(), deleted.size());
  481. if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
  482. SimilarityRenameDetector d;
  483. d = new SimilarityRenameDetector(reader, deleted, added);
  484. d.setRenameScore(getRenameScore());
  485. d.setBigFileThreshold(getBigFileThreshold());
  486. d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
  487. d.compute(pm);
  488. overRenameLimit |= d.isTableOverflow();
  489. deleted = d.getLeftOverSources();
  490. added = d.getLeftOverDestinations();
  491. entries.addAll(d.getMatches());
  492. } else {
  493. overRenameLimit = true;
  494. }
  495. }
  496. @SuppressWarnings("unchecked")
  497. private void findExactRenames(ProgressMonitor pm)
  498. throws CancelledException {
  499. pm.beginTask(JGitText.get().renamesFindingExact, //
  500. added.size() + added.size() + deleted.size()
  501. + added.size() * deleted.size());
  502. HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
  503. HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
  504. ArrayList<DiffEntry> uniqueAdds = new ArrayList<>(added.size());
  505. ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<>();
  506. for (Object o : addedMap.values()) {
  507. if (o instanceof DiffEntry)
  508. uniqueAdds.add((DiffEntry) o);
  509. else
  510. nonUniqueAdds.add((List<DiffEntry>) o);
  511. }
  512. ArrayList<DiffEntry> left = new ArrayList<>(added.size());
  513. for (DiffEntry a : uniqueAdds) {
  514. Object del = deletedMap.get(a.newId);
  515. if (del instanceof DiffEntry) {
  516. // We have one add to one delete: pair them if they are the same
  517. // type
  518. DiffEntry e = (DiffEntry) del;
  519. if (sameType(e.oldMode, a.newMode)) {
  520. e.changeType = ChangeType.RENAME;
  521. entries.add(exactRename(e, a));
  522. } else {
  523. left.add(a);
  524. }
  525. } else if (del != null) {
  526. // We have one add to many deletes: find the delete with the
  527. // same type and closest name to the add, then pair them
  528. List<DiffEntry> list = (List<DiffEntry>) del;
  529. DiffEntry best = bestPathMatch(a, list);
  530. if (best != null) {
  531. best.changeType = ChangeType.RENAME;
  532. entries.add(exactRename(best, a));
  533. } else {
  534. left.add(a);
  535. }
  536. } else {
  537. left.add(a);
  538. }
  539. advanceOrCancel(pm);
  540. }
  541. for (List<DiffEntry> adds : nonUniqueAdds) {
  542. Object o = deletedMap.get(adds.get(0).newId);
  543. if (o instanceof DiffEntry) {
  544. // We have many adds to one delete: find the add with the same
  545. // type and closest name to the delete, then pair them. Mark the
  546. // rest as copies of the delete.
  547. DiffEntry d = (DiffEntry) o;
  548. DiffEntry best = bestPathMatch(d, adds);
  549. if (best != null) {
  550. d.changeType = ChangeType.RENAME;
  551. entries.add(exactRename(d, best));
  552. for (DiffEntry a : adds) {
  553. if (a != best) {
  554. if (sameType(d.oldMode, a.newMode)) {
  555. entries.add(exactCopy(d, a));
  556. } else {
  557. left.add(a);
  558. }
  559. }
  560. }
  561. } else {
  562. left.addAll(adds);
  563. }
  564. } else if (o != null) {
  565. // We have many adds to many deletes: score all the adds against
  566. // all the deletes by path name, take the best matches, pair
  567. // them as renames, then call the rest copies
  568. List<DiffEntry> dels = (List<DiffEntry>) o;
  569. long[] matrix = new long[dels.size() * adds.size()];
  570. int mNext = 0;
  571. for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
  572. String deletedName = dels.get(delIdx).oldPath;
  573. for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
  574. String addedName = adds.get(addIdx).newPath;
  575. int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
  576. matrix[mNext] = SimilarityRenameDetector.encode(score, delIdx, addIdx);
  577. mNext++;
  578. if (pm.isCancelled()) {
  579. throw new CancelledException(
  580. JGitText.get().renameCancelled);
  581. }
  582. }
  583. }
  584. Arrays.sort(matrix);
  585. for (--mNext; mNext >= 0; mNext--) {
  586. long ent = matrix[mNext];
  587. int delIdx = SimilarityRenameDetector.srcFile(ent);
  588. int addIdx = SimilarityRenameDetector.dstFile(ent);
  589. DiffEntry d = dels.get(delIdx);
  590. DiffEntry a = adds.get(addIdx);
  591. if (a == null) {
  592. advanceOrCancel(pm);
  593. continue; // was already matched earlier
  594. }
  595. ChangeType type;
  596. if (d.changeType == ChangeType.DELETE) {
  597. // First use of this source file. Tag it as a rename so we
  598. // later know it is already been used as a rename, other
  599. // matches (if any) will claim themselves as copies instead.
  600. //
  601. d.changeType = ChangeType.RENAME;
  602. type = ChangeType.RENAME;
  603. } else {
  604. type = ChangeType.COPY;
  605. }
  606. entries.add(DiffEntry.pair(type, d, a, 100));
  607. adds.set(addIdx, null); // Claim the destination was matched.
  608. advanceOrCancel(pm);
  609. }
  610. } else {
  611. left.addAll(adds);
  612. }
  613. advanceOrCancel(pm);
  614. }
  615. added = left;
  616. deleted = new ArrayList<>(deletedMap.size());
  617. for (Object o : deletedMap.values()) {
  618. if (o instanceof DiffEntry) {
  619. DiffEntry e = (DiffEntry) o;
  620. if (e.changeType == ChangeType.DELETE)
  621. deleted.add(e);
  622. } else {
  623. List<DiffEntry> list = (List<DiffEntry>) o;
  624. for (DiffEntry e : list) {
  625. if (e.changeType == ChangeType.DELETE)
  626. deleted.add(e);
  627. }
  628. }
  629. }
  630. pm.endTask();
  631. }
  632. /**
  633. * Find the best match by file path for a given DiffEntry from a list of
  634. * DiffEntrys. The returned DiffEntry will be of the same type as <src>. If
  635. * no DiffEntry can be found that has the same type, this method will return
  636. * null.
  637. *
  638. * @param src
  639. * the DiffEntry to try to find a match for
  640. * @param list
  641. * a list of DiffEntrys to search through
  642. * @return the DiffEntry from <list> who's file path best matches <src>
  643. */
  644. private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
  645. DiffEntry best = null;
  646. int score = -1;
  647. for (DiffEntry d : list) {
  648. if (sameType(mode(d), mode(src))) {
  649. int tmp = SimilarityRenameDetector
  650. .nameScore(path(d), path(src));
  651. if (tmp > score) {
  652. best = d;
  653. score = tmp;
  654. }
  655. }
  656. }
  657. return best;
  658. }
  659. @SuppressWarnings("unchecked")
  660. private HashMap<AbbreviatedObjectId, Object> populateMap(
  661. List<DiffEntry> diffEntries, ProgressMonitor pm)
  662. throws CancelledException {
  663. HashMap<AbbreviatedObjectId, Object> map = new HashMap<>();
  664. for (DiffEntry de : diffEntries) {
  665. Object old = map.put(id(de), de);
  666. if (old instanceof DiffEntry) {
  667. ArrayList<DiffEntry> list = new ArrayList<>(2);
  668. list.add((DiffEntry) old);
  669. list.add(de);
  670. map.put(id(de), list);
  671. } else if (old != null) {
  672. // Must be a list of DiffEntries
  673. ((List<DiffEntry>) old).add(de);
  674. map.put(id(de), old);
  675. }
  676. advanceOrCancel(pm);
  677. }
  678. return map;
  679. }
  680. private static String path(DiffEntry de) {
  681. return de.changeType == ChangeType.DELETE ? de.oldPath : de.newPath;
  682. }
  683. private static FileMode mode(DiffEntry de) {
  684. return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
  685. }
  686. private static AbbreviatedObjectId id(DiffEntry de) {
  687. return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
  688. }
  689. static boolean sameType(FileMode a, FileMode b) {
  690. // Files have to be of the same type in order to rename them.
  691. // We would never want to rename a file to a gitlink, or a
  692. // symlink to a file.
  693. //
  694. int aType = a.getBits() & FileMode.TYPE_MASK;
  695. int bType = b.getBits() & FileMode.TYPE_MASK;
  696. return aType == bType;
  697. }
  698. private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
  699. return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
  700. }
  701. private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
  702. return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
  703. }
  704. }