You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RenameDetector.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.diff;
  44. import java.io.IOException;
  45. import java.util.ArrayList;
  46. import java.util.Collection;
  47. import java.util.Collections;
  48. import java.util.Comparator;
  49. import java.util.HashMap;
  50. import java.util.List;
  51. import org.eclipse.jgit.JGitText;
  52. import org.eclipse.jgit.diff.DiffEntry.ChangeType;
  53. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  54. import org.eclipse.jgit.lib.Config;
  55. import org.eclipse.jgit.lib.FileMode;
  56. import org.eclipse.jgit.lib.NullProgressMonitor;
  57. import org.eclipse.jgit.lib.ProgressMonitor;
  58. import org.eclipse.jgit.lib.Repository;
  59. /** Detect and resolve object renames. */
  60. public class RenameDetector {
  61. private static final int EXACT_RENAME_SCORE = 100;
  62. private static final Comparator<DiffEntry> DIFF_COMPARATOR = new Comparator<DiffEntry>() {
  63. public int compare(DiffEntry a, DiffEntry b) {
  64. int cmp = nameOf(a).compareTo(nameOf(b));
  65. if (cmp == 0)
  66. cmp = sortOf(a.getChangeType()) - sortOf(b.getChangeType());
  67. return cmp;
  68. }
  69. private String nameOf(DiffEntry ent) {
  70. // Sort by the new name, unless the change is a delete. On
  71. // deletes the new name is /dev/null, so we sort instead by
  72. // the old name.
  73. //
  74. if (ent.changeType == ChangeType.DELETE)
  75. return ent.oldName;
  76. return ent.newName;
  77. }
  78. private int sortOf(ChangeType changeType) {
  79. // Sort deletes before adds so that a major type change for
  80. // a file path (such as symlink to regular file) will first
  81. // remove the path, then add it back with the new type.
  82. //
  83. switch (changeType) {
  84. case DELETE:
  85. return 1;
  86. case ADD:
  87. return 2;
  88. default:
  89. return 10;
  90. }
  91. }
  92. };
  93. private final List<DiffEntry> entries = new ArrayList<DiffEntry>();
  94. private List<DiffEntry> deleted = new ArrayList<DiffEntry>();
  95. private List<DiffEntry> added = new ArrayList<DiffEntry>();
  96. private boolean done;
  97. private final Repository repo;
  98. /** Similarity score required to pair an add/delete as a rename. */
  99. private int renameScore = 60;
  100. /** Limit in the number of files to consider for renames. */
  101. private int renameLimit;
  102. /** Set if the number of adds or deletes was over the limit. */
  103. private boolean overRenameLimit;
  104. /**
  105. * Create a new rename detector for the given repository
  106. *
  107. * @param repo
  108. * the repository to use for rename detection
  109. */
  110. public RenameDetector(Repository repo) {
  111. this.repo = repo;
  112. Config cfg = repo.getConfig();
  113. renameLimit = cfg.getInt("diff", "renamelimit", 200);
  114. }
  115. /**
  116. * @return minimum score required to pair an add/delete as a rename. The
  117. * score ranges are within the bounds of (0, 100).
  118. */
  119. public int getRenameScore() {
  120. return renameScore;
  121. }
  122. /**
  123. * Set the minimum score required to pair an add/delete as a rename.
  124. * <p>
  125. * When comparing two files together their score must be greater than or
  126. * equal to the rename score for them to be considered a rename match. The
  127. * score is computed based on content similarity, so a score of 60 implies
  128. * that approximately 60% of the bytes in the files are identical.
  129. *
  130. * @param score
  131. * new rename score, must be within (0, 100).
  132. */
  133. public void setRenameScore(int score) {
  134. if (score < 0 || score > 100)
  135. throw new IllegalArgumentException(
  136. JGitText.get().similarityScoreMustBeWithinBounds);
  137. renameScore = score;
  138. }
  139. /** @return limit on number of paths to perform inexact rename detection. */
  140. public int getRenameLimit() {
  141. return renameLimit;
  142. }
  143. /**
  144. * Set the limit on the number of files to perform inexact rename detection.
  145. * <p>
  146. * The rename detector has to build a square matrix of the rename limit on
  147. * each side, then perform that many file compares to determine similarity.
  148. * If 1000 files are added, and 1000 files are deleted, a 1000*1000 matrix
  149. * must be allocated, and 1,000,000 file compares may need to be performed.
  150. *
  151. * @param limit
  152. * new file limit.
  153. */
  154. public void setRenameLimit(int limit) {
  155. renameLimit = limit;
  156. }
  157. /**
  158. * Check if the detector is over the rename limit.
  159. * <p>
  160. * This method can be invoked either before or after {@code getEntries} has
  161. * been used to perform rename detection.
  162. *
  163. * @return true if the detector has more file additions or removals than the
  164. * rename limit is currently set to. In such configurations the
  165. * detector will skip expensive computation.
  166. */
  167. public boolean isOverRenameLimit() {
  168. if (done)
  169. return overRenameLimit;
  170. int cnt = Math.max(added.size(), deleted.size());
  171. return getRenameLimit() != 0 && getRenameLimit() < cnt;
  172. }
  173. /**
  174. * Add entries to be considered for rename detection.
  175. *
  176. * @param entriesToAdd
  177. * one or more entries to add.
  178. * @throws IllegalStateException
  179. * if {@code getEntries} was already invoked.
  180. */
  181. public void addAll(Collection<DiffEntry> entriesToAdd) {
  182. if (done)
  183. throw new IllegalStateException(JGitText.get().renamesAlreadyFound);
  184. for (DiffEntry entry : entriesToAdd) {
  185. switch (entry.getChangeType()) {
  186. case ADD:
  187. added.add(entry);
  188. break;
  189. case DELETE:
  190. deleted.add(entry);
  191. break;
  192. case MODIFY:
  193. if (sameType(entry.getOldMode(), entry.getNewMode()))
  194. entries.add(entry);
  195. else
  196. entries.addAll(DiffEntry.breakModify(entry));
  197. break;
  198. case COPY:
  199. case RENAME:
  200. default:
  201. entriesToAdd.add(entry);
  202. }
  203. }
  204. }
  205. /**
  206. * Add an entry to be considered for rename detection.
  207. *
  208. * @param entry
  209. * to add.
  210. * @throws IllegalStateException
  211. * if {@code getEntries} was already invoked.
  212. */
  213. public void add(DiffEntry entry) {
  214. addAll(Collections.singletonList(entry));
  215. }
  216. /**
  217. * Detect renames in the current file set.
  218. * <p>
  219. * This convenience function runs without a progress monitor.
  220. *
  221. * @return an unmodifiable list of {@link DiffEntry}s representing all files
  222. * that have been changed.
  223. * @throws IOException
  224. * file contents cannot be read from the repository.
  225. */
  226. public List<DiffEntry> compute() throws IOException {
  227. return compute(NullProgressMonitor.INSTANCE);
  228. }
  229. /**
  230. * Detect renames in the current file set.
  231. *
  232. * @param pm
  233. * report progress during the detection phases.
  234. * @return an unmodifiable list of {@link DiffEntry}s representing all files
  235. * that have been changed.
  236. * @throws IOException
  237. * file contents cannot be read from the repository.
  238. */
  239. public List<DiffEntry> compute(ProgressMonitor pm) throws IOException {
  240. if (!done) {
  241. done = true;
  242. if (pm == null)
  243. pm = NullProgressMonitor.INSTANCE;
  244. findExactRenames(pm);
  245. findContentRenames(pm);
  246. entries.addAll(added);
  247. added = null;
  248. entries.addAll(deleted);
  249. deleted = null;
  250. Collections.sort(entries, DIFF_COMPARATOR);
  251. }
  252. return Collections.unmodifiableList(entries);
  253. }
  254. private void findContentRenames(ProgressMonitor pm) throws IOException {
  255. int cnt = Math.max(added.size(), deleted.size());
  256. if (cnt == 0)
  257. return;
  258. if (getRenameLimit() == 0 || cnt <= getRenameLimit()) {
  259. SimilarityRenameDetector d;
  260. d = new SimilarityRenameDetector(repo, deleted, added);
  261. d.setRenameScore(getRenameScore());
  262. d.compute(pm);
  263. deleted = d.getLeftOverSources();
  264. added = d.getLeftOverDestinations();
  265. entries.addAll(d.getMatches());
  266. } else {
  267. overRenameLimit = true;
  268. }
  269. }
  270. @SuppressWarnings("unchecked")
  271. private void findExactRenames(ProgressMonitor pm) {
  272. if (added.isEmpty() || deleted.isEmpty())
  273. return;
  274. pm.beginTask(JGitText.get().renamesFindingExact, //
  275. added.size() + deleted.size());
  276. HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
  277. for (DiffEntry del : deleted) {
  278. Object old = map.put(del.oldId, del);
  279. if (old instanceof DiffEntry) {
  280. ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
  281. list.add((DiffEntry) old);
  282. list.add(del);
  283. map.put(del.oldId, list);
  284. } else if (old != null) {
  285. // Must be a list of DiffEntries
  286. ((List) old).add(del);
  287. map.put(del.oldId, old);
  288. }
  289. pm.update(1);
  290. }
  291. ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size());
  292. for (DiffEntry dst : added) {
  293. Object del = map.get(dst.newId);
  294. if (del instanceof DiffEntry) {
  295. DiffEntry e = (DiffEntry) del;
  296. if (sameType(e.oldMode, dst.newMode)) {
  297. if (e.changeType == ChangeType.DELETE) {
  298. e.changeType = ChangeType.RENAME;
  299. entries.add(exactRename(e, dst));
  300. } else {
  301. entries.add(exactCopy(e, dst));
  302. }
  303. } else {
  304. left.add(dst);
  305. }
  306. } else if (del != null) {
  307. List<DiffEntry> list = (List<DiffEntry>) del;
  308. DiffEntry best = null;
  309. for (DiffEntry e : list) {
  310. if (best == null && sameType(e.oldMode, dst.newMode))
  311. best = e;
  312. }
  313. if (best != null) {
  314. if (best.changeType == ChangeType.DELETE) {
  315. best.changeType = ChangeType.RENAME;
  316. entries.add(exactRename(best, dst));
  317. } else {
  318. entries.add(exactCopy(best, dst));
  319. }
  320. } else {
  321. left.add(dst);
  322. }
  323. } else {
  324. left.add(dst);
  325. }
  326. pm.update(1);
  327. }
  328. added = left;
  329. deleted = new ArrayList<DiffEntry>(map.size());
  330. for (Object o : map.values()) {
  331. if (o instanceof DiffEntry) {
  332. DiffEntry e = (DiffEntry) o;
  333. if (e.changeType == ChangeType.DELETE)
  334. deleted.add(e);
  335. } else {
  336. List<DiffEntry> list = (List<DiffEntry>) o;
  337. for (DiffEntry e : list) {
  338. if (e.changeType == ChangeType.DELETE)
  339. deleted.add(e);
  340. }
  341. }
  342. }
  343. pm.endTask();
  344. }
  345. static boolean sameType(FileMode a, FileMode b) {
  346. // Files have to be of the same type in order to rename them.
  347. // We would never want to rename a file to a gitlink, or a
  348. // symlink to a file.
  349. //
  350. int aType = a.getBits() & FileMode.TYPE_MASK;
  351. int bType = b.getBits() & FileMode.TYPE_MASK;
  352. return aType == bType;
  353. }
  354. private static DiffEntry exactRename(DiffEntry src, DiffEntry dst) {
  355. return DiffEntry.pair(ChangeType.RENAME, src, dst, EXACT_RENAME_SCORE);
  356. }
  357. private static DiffEntry exactCopy(DiffEntry src, DiffEntry dst) {
  358. return DiffEntry.pair(ChangeType.COPY, src, dst, EXACT_RENAME_SCORE);
  359. }
  360. }