You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DiffAlgorithms.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /*
  2. * Copyright (C) 2010, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.pgm.debug;
  44. import java.io.File;
  45. import java.lang.management.ManagementFactory;
  46. import java.lang.management.ThreadMXBean;
  47. import java.lang.reflect.Field;
  48. import java.util.ArrayList;
  49. import java.util.Collections;
  50. import java.util.Comparator;
  51. import java.util.List;
  52. import org.eclipse.jgit.diff.DiffAlgorithm;
  53. import org.eclipse.jgit.diff.HistogramDiff;
  54. import org.eclipse.jgit.diff.MyersDiff;
  55. import org.eclipse.jgit.diff.PatienceDiff;
  56. import org.eclipse.jgit.diff.RawText;
  57. import org.eclipse.jgit.diff.RawTextComparator;
  58. import org.eclipse.jgit.errors.LargeObjectException;
  59. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  60. import org.eclipse.jgit.lib.AnyObjectId;
  61. import org.eclipse.jgit.lib.Constants;
  62. import org.eclipse.jgit.lib.FileMode;
  63. import org.eclipse.jgit.lib.MutableObjectId;
  64. import org.eclipse.jgit.lib.ObjectId;
  65. import org.eclipse.jgit.lib.ObjectReader;
  66. import org.eclipse.jgit.lib.Repository;
  67. import org.eclipse.jgit.lib.RepositoryBuilder;
  68. import org.eclipse.jgit.lib.RepositoryCache;
  69. import org.eclipse.jgit.pgm.CLIText;
  70. import org.eclipse.jgit.pgm.TextBuiltin;
  71. import org.eclipse.jgit.revwalk.RevCommit;
  72. import org.eclipse.jgit.revwalk.RevWalk;
  73. import org.eclipse.jgit.treewalk.TreeWalk;
  74. import org.eclipse.jgit.treewalk.filter.TreeFilter;
  75. import org.eclipse.jgit.util.FS;
  76. import org.kohsuke.args4j.Option;
  77. class DiffAlgorithms extends TextBuiltin {
  78. final Algorithm myers = new Algorithm() {
  79. DiffAlgorithm create() {
  80. return MyersDiff.INSTANCE;
  81. }
  82. };
  83. final Algorithm histogram = new Algorithm() {
  84. DiffAlgorithm create() {
  85. HistogramDiff d = new HistogramDiff();
  86. d.setFallbackAlgorithm(null);
  87. return d;
  88. }
  89. };
  90. final Algorithm histogram_myers = new Algorithm() {
  91. DiffAlgorithm create() {
  92. HistogramDiff d = new HistogramDiff();
  93. d.setFallbackAlgorithm(MyersDiff.INSTANCE);
  94. return d;
  95. }
  96. };
  97. final Algorithm patience = new Algorithm() {
  98. DiffAlgorithm create() {
  99. PatienceDiff d = new PatienceDiff();
  100. d.setFallbackAlgorithm(null);
  101. return d;
  102. }
  103. };
  104. final Algorithm patience_myers = new Algorithm() {
  105. DiffAlgorithm create() {
  106. PatienceDiff d = new PatienceDiff();
  107. d.setFallbackAlgorithm(MyersDiff.INSTANCE);
  108. return d;
  109. }
  110. };
  111. final Algorithm patience_histogram_myers = new Algorithm() {
  112. DiffAlgorithm create() {
  113. HistogramDiff d2 = new HistogramDiff();
  114. d2.setFallbackAlgorithm(MyersDiff.INSTANCE);
  115. PatienceDiff d1 = new PatienceDiff();
  116. d1.setFallbackAlgorithm(d2);
  117. return d1;
  118. }
  119. };
  120. // -----------------------------------------------------------------------
  121. //
  122. // Implementation of the suite lives below this line.
  123. //
  124. //
  125. @Option(name = "--algorithm", multiValued = true, metaVar = "NAME", usage = "Enable algorithm(s)")
  126. List<String> algorithms = new ArrayList<String>();
  127. @Option(name = "--text-limit", metaVar = "LIMIT", usage = "Maximum size in KiB to scan per file revision")
  128. int textLimit = 15 * 1024; // 15 MiB as later we do * 1024.
  129. @Option(name = "--repository", aliases = { "-r" }, multiValued = true, metaVar = "GIT_DIR", usage = "Repository to scan")
  130. List<File> gitDirs = new ArrayList<File>();
  131. @Option(name = "--count", metaVar = "LIMIT", usage = "Number of file revisions to be compared")
  132. int count = 0; // unlimited
  133. private final RawTextComparator cmp = RawTextComparator.DEFAULT;
  134. private ThreadMXBean mxBean;
  135. @Override
  136. protected boolean requiresRepository() {
  137. return false;
  138. }
  139. @Override
  140. protected void run() throws Exception {
  141. mxBean = ManagementFactory.getThreadMXBean();
  142. if (!mxBean.isCurrentThreadCpuTimeSupported())
  143. throw die("Current thread CPU time not supported on this JRE");
  144. if (gitDirs.isEmpty()) {
  145. RepositoryBuilder rb = new RepositoryBuilder() //
  146. .setGitDir(gitdir) //
  147. .readEnvironment() //
  148. .findGitDir();
  149. if (rb.getGitDir() == null)
  150. throw die(CLIText.get().cantFindGitDirectory);
  151. gitDirs.add(rb.getGitDir());
  152. }
  153. for (File dir : gitDirs) {
  154. RepositoryBuilder rb = new RepositoryBuilder();
  155. if (RepositoryCache.FileKey.isGitRepository(dir, FS.DETECTED))
  156. rb.setGitDir(dir);
  157. else
  158. rb.findGitDir(dir);
  159. Repository db = rb.build();
  160. try {
  161. run(db);
  162. } finally {
  163. db.close();
  164. }
  165. }
  166. }
  167. private void run(Repository db) throws Exception {
  168. List<Test> all = init();
  169. long files = 0;
  170. int commits = 0;
  171. int minN = Integer.MAX_VALUE;
  172. int maxN = 0;
  173. AbbreviatedObjectId startId;
  174. ObjectReader or = db.newObjectReader();
  175. try {
  176. final MutableObjectId id = new MutableObjectId();
  177. RevWalk rw = new RevWalk(or);
  178. TreeWalk tw = new TreeWalk(or);
  179. tw.setFilter(TreeFilter.ANY_DIFF);
  180. tw.setRecursive(true);
  181. ObjectId start = db.resolve(Constants.HEAD);
  182. startId = or.abbreviate(start);
  183. rw.markStart(rw.parseCommit(start));
  184. for (;;) {
  185. final RevCommit c = rw.next();
  186. if (c == null)
  187. break;
  188. commits++;
  189. if (c.getParentCount() != 1)
  190. continue;
  191. RevCommit p = c.getParent(0);
  192. rw.parseHeaders(p);
  193. tw.reset(new AnyObjectId[] { p.getTree(), c.getTree() });
  194. while (tw.next()) {
  195. if (!isFile(tw, 0) || !isFile(tw, 1))
  196. continue;
  197. byte[] raw0;
  198. try {
  199. tw.getObjectId(id, 0);
  200. raw0 = or.open(id).getCachedBytes(textLimit * 1024);
  201. } catch (LargeObjectException tooBig) {
  202. continue;
  203. }
  204. if (RawText.isBinary(raw0))
  205. continue;
  206. byte[] raw1;
  207. try {
  208. tw.getObjectId(id, 1);
  209. raw1 = or.open(id).getCachedBytes(textLimit * 1024);
  210. } catch (LargeObjectException tooBig) {
  211. continue;
  212. }
  213. if (RawText.isBinary(raw1))
  214. continue;
  215. RawText txt0 = new RawText(raw0);
  216. RawText txt1 = new RawText(raw1);
  217. minN = Math.min(minN, txt0.size() + txt1.size());
  218. maxN = Math.max(maxN, txt0.size() + txt1.size());
  219. for (Test test : all)
  220. testOne(test, txt0, txt1);
  221. files++;
  222. }
  223. if (count > 0 && files > count)
  224. break;
  225. }
  226. } finally {
  227. or.release();
  228. }
  229. Collections.sort(all, new Comparator<Test>() {
  230. public int compare(Test a, Test b) {
  231. int cmp = Long.signum(a.runningTimeNanos - b.runningTimeNanos);
  232. if (cmp == 0)
  233. cmp = a.algorithm.name.compareTo(b.algorithm.name);
  234. return cmp;
  235. }
  236. });
  237. if (db.getDirectory() != null) {
  238. String name = db.getDirectory().getName();
  239. File parent = db.getDirectory().getParentFile();
  240. if (name.equals(Constants.DOT_GIT_EXT) && parent != null)
  241. name = parent.getName();
  242. out.println(name + ": start at " + startId.name());
  243. }
  244. out.format(" %12d files, %8d commits\n", files, commits);
  245. out.format(" N=%10d min lines, %8d max lines\n", minN, maxN);
  246. out.format("%-25s %12s ( %12s %12s )\n", //
  247. "Algorithm", "Time(ns)", "Time(ns) on", "Time(ns) on");
  248. out.format("%-25s %12s ( %12s %12s )\n", //
  249. "", "", "N=" + minN, "N=" + maxN);
  250. out.println("-----------------------------------------------------"
  251. + "----------------");
  252. for (Test test : all) {
  253. out.format("%-25s %12d ( %12d %12d )", //
  254. test.algorithm.name, //
  255. test.runningTimeNanos, //
  256. test.minN.runningTimeNanos, //
  257. test.maxN.runningTimeNanos);
  258. out.println();
  259. }
  260. out.println();
  261. out.flush();
  262. }
  263. private static boolean isFile(TreeWalk tw, int ithTree) {
  264. FileMode fm = tw.getFileMode(ithTree);
  265. return FileMode.REGULAR_FILE.equals(fm)
  266. || FileMode.EXECUTABLE_FILE.equals(fm);
  267. }
  268. private static final int minCPUTimerTicks = 10;
  269. private void testOne(Test test, RawText a, RawText b) {
  270. final DiffAlgorithm da = test.algorithm.create();
  271. int cpuTimeChanges = 0;
  272. int cnt = 0;
  273. final long startTime = mxBean.getCurrentThreadCpuTime();
  274. long lastTime = startTime;
  275. while (cpuTimeChanges < minCPUTimerTicks) {
  276. da.diff(cmp, a, b);
  277. cnt++;
  278. long interimTime = mxBean.getCurrentThreadCpuTime();
  279. if (interimTime != lastTime) {
  280. cpuTimeChanges++;
  281. lastTime = interimTime;
  282. }
  283. }
  284. final long stopTime = mxBean.getCurrentThreadCpuTime();
  285. final long runTime = (stopTime - startTime) / cnt;
  286. test.runningTimeNanos += runTime;
  287. if (test.minN == null || a.size() + b.size() < test.minN.n) {
  288. test.minN = new Run();
  289. test.minN.n = a.size() + b.size();
  290. test.minN.runningTimeNanos = runTime;
  291. }
  292. if (test.maxN == null || a.size() + b.size() > test.maxN.n) {
  293. test.maxN = new Run();
  294. test.maxN.n = a.size() + b.size();
  295. test.maxN.runningTimeNanos = runTime;
  296. }
  297. }
  298. private List<Test> init() {
  299. List<Test> all = new ArrayList<Test>();
  300. try {
  301. for (Field f : DiffAlgorithms.class.getDeclaredFields()) {
  302. if (f.getType() == Algorithm.class) {
  303. f.setAccessible(true);
  304. Algorithm alg = (Algorithm) f.get(this);
  305. alg.name = f.getName();
  306. if (included(alg.name, algorithms)) {
  307. Test test = new Test();
  308. test.algorithm = alg;
  309. all.add(test);
  310. }
  311. }
  312. }
  313. } catch (IllegalArgumentException e) {
  314. throw die("Cannot determine names", e);
  315. } catch (IllegalAccessException e) {
  316. throw die("Cannot determine names", e);
  317. }
  318. return all;
  319. }
  320. private static boolean included(String name, List<String> want) {
  321. if (want.isEmpty())
  322. return true;
  323. for (String s : want) {
  324. if (s.equalsIgnoreCase(name))
  325. return true;
  326. }
  327. return false;
  328. }
  329. private static abstract class Algorithm {
  330. String name;
  331. abstract DiffAlgorithm create();
  332. }
  333. private static class Test {
  334. Algorithm algorithm;
  335. long runningTimeNanos;
  336. Run minN;
  337. Run maxN;
  338. }
  339. private static class Run {
  340. int n;
  341. long runningTimeNanos;
  342. }
  343. }