選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

MyersDiff.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /*
  2. * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
  3. * Copyright (C) 2009, Johannes Schindelin <johannes.schindelin@gmx.de>
  4. * and other copyright owners as documented in the project's IP log.
  5. *
  6. * This program and the accompanying materials are made available
  7. * under the terms of the Eclipse Distribution License v1.0 which
  8. * accompanies this distribution, is reproduced below, and is
  9. * available at http://www.eclipse.org/org/documents/edl-v10.php
  10. *
  11. * All rights reserved.
  12. *
  13. * Redistribution and use in source and binary forms, with or
  14. * without modification, are permitted provided that the following
  15. * conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * - Neither the name of the Eclipse Foundation, Inc. nor the
  26. * names of its contributors may be used to endorse or promote
  27. * products derived from this software without specific prior
  28. * written permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  31. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  32. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  36. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  37. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  38. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  39. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  40. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  41. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  42. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. */
  44. package org.eclipse.jgit.diff;
  45. import java.util.ArrayList;
  46. import java.util.Iterator;
  47. import java.util.List;
  48. import org.eclipse.jgit.util.IntList;
  49. import org.eclipse.jgit.util.LongList;
  50. public class MyersDiff {
  51. protected EditList edits;
  52. protected Sequence a, b;
  53. public MyersDiff(Sequence a, Sequence b) {
  54. this.a = a;
  55. this.b = b;
  56. calculateEdits();
  57. }
  58. public EditList getEdits() {
  59. return edits;
  60. }
  61. // TODO: use ThreadLocal for future multi-threaded operations
  62. MiddleEdit middle = new MiddleEdit();
  63. protected void calculateEdits() {
  64. edits = new EditList();
  65. middle.initialize(0, a.size(), 0, b.size());
  66. if (middle.beginA >= middle.endA &&
  67. middle.beginB >= middle.endB)
  68. return;
  69. calculateEdits(middle.beginA, middle.endA,
  70. middle.beginB, middle.endB);
  71. }
  72. protected void calculateEdits(int beginA, int endA,
  73. int beginB, int endB) {
  74. Edit edit = middle.calculate(beginA, endA, beginB, endB);
  75. if (beginA < edit.beginA || beginB < edit.beginB) {
  76. int k = edit.beginB - edit.beginA;
  77. int x = middle.backward.snake(k, edit.beginA);
  78. calculateEdits(beginA, x, beginB, k + x);
  79. }
  80. if (edit.getType() != Edit.Type.EMPTY)
  81. edits.add(edits.size(), edit);
  82. // after middle
  83. if (endA > edit.endA || endB > edit.endB) {
  84. int k = edit.endB - edit.endA;
  85. int x = middle.forward.snake(k, edit.endA);
  86. calculateEdits(x, endA, k + x, endB);
  87. }
  88. }
  89. /**
  90. * A class to help bisecting the sequences a and b to find minimal
  91. * edit paths.
  92. *
  93. * As the arrays are reused for space efficiency, you will need one
  94. * instance per thread.
  95. *
  96. * The entry function is the calculate() method.
  97. */
  98. class MiddleEdit {
  99. void initialize(int beginA, int endA, int beginB, int endB) {
  100. this.beginA = beginA; this.endA = endA;
  101. this.beginB = beginB; this.endB = endB;
  102. // strip common parts on either end
  103. int k = beginB - beginA;
  104. this.beginA = forward.snake(k, beginA);
  105. this.beginB = k + this.beginA;
  106. k = endB - endA;
  107. this.endA = backward.snake(k, endA);
  108. this.endB = k + this.endA;
  109. }
  110. /*
  111. * This function calculates the "middle" Edit of the shortest
  112. * edit path between the given subsequences of a and b.
  113. *
  114. * Once a forward path and a backward path meet, we found the
  115. * middle part. From the last snake end point on both of them,
  116. * we construct the Edit.
  117. *
  118. * It is assumed that there is at least one edit in the range.
  119. */
  120. // TODO: measure speed impact when this is synchronized
  121. Edit calculate(int beginA, int endA, int beginB, int endB) {
  122. if (beginA == endA || beginB == endB)
  123. return new Edit(beginA, endA, beginB, endB);
  124. this.beginA = beginA; this.endA = endA;
  125. this.beginB = beginB; this.endB = endB;
  126. /*
  127. * Following the conventions in Myers' paper, "k" is
  128. * the difference between the index into "b" and the
  129. * index into "a".
  130. */
  131. int minK = beginB - endA;
  132. int maxK = endB - beginA;
  133. forward.initialize(beginB - beginA, beginA, minK, maxK);
  134. backward.initialize(endB - endA, endA, minK, maxK);
  135. for (int d = 1; ; d++)
  136. if (forward.calculate(d) ||
  137. backward.calculate(d))
  138. return edit;
  139. }
  140. /*
  141. * For each d, we need to hold the d-paths for the diagonals
  142. * k = -d, -d + 2, ..., d - 2, d. These are stored in the
  143. * forward (and backward) array.
  144. *
  145. * As we allow subsequences, too, this needs some refinement:
  146. * the forward paths start on the diagonal forwardK =
  147. * beginB - beginA, and backward paths start on the diagonal
  148. * backwardK = endB - endA.
  149. *
  150. * So, we need to hold the forward d-paths for the diagonals
  151. * k = forwardK - d, forwardK - d + 2, ..., forwardK + d and
  152. * the analogue for the backward d-paths. This means that
  153. * we can turn (k, d) into the forward array index using this
  154. * formula:
  155. *
  156. * i = (d + k - forwardK) / 2
  157. *
  158. * There is a further complication: the edit paths should not
  159. * leave the specified subsequences, so k is bounded by
  160. * minK = beginB - endA and maxK = endB - beginA. However,
  161. * (k - forwardK) _must_ be odd whenever d is odd, and it
  162. * _must_ be even when d is even.
  163. *
  164. * The values in the "forward" and "backward" arrays are
  165. * positions ("x") in the sequence a, to get the corresponding
  166. * positions ("y") in the sequence b, you have to calculate
  167. * the appropriate k and then y:
  168. *
  169. * k = forwardK - d + i * 2
  170. * y = k + x
  171. *
  172. * (substitute backwardK for forwardK if you want to get the
  173. * y position for an entry in the "backward" array.
  174. */
  175. EditPaths forward = new ForwardEditPaths();
  176. EditPaths backward = new BackwardEditPaths();
  177. /* Some variables which are shared between methods */
  178. protected int beginA, endA, beginB, endB;
  179. protected Edit edit;
  180. abstract class EditPaths {
  181. private IntList x = new IntList();
  182. private LongList snake = new LongList();
  183. int beginK, endK, middleK;
  184. int prevBeginK, prevEndK;
  185. /* if we hit one end early, no need to look further */
  186. int minK, maxK; // TODO: better explanation
  187. final int getIndex(int d, int k) {
  188. // TODO: remove
  189. if (((d + k - middleK) % 2) == 1)
  190. throw new RuntimeException("odd: " + d + " + " + k + " - " + middleK);
  191. return (d + k - middleK) / 2;
  192. }
  193. final int getX(int d, int k) {
  194. // TODO: remove
  195. if (k < beginK || k > endK)
  196. throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK);
  197. return x.get(getIndex(d, k));
  198. }
  199. final long getSnake(int d, int k) {
  200. // TODO: remove
  201. if (k < beginK || k > endK)
  202. throw new RuntimeException("k " + k + " not in " + beginK + " - " + endK);
  203. return snake.get(getIndex(d, k));
  204. }
  205. private int forceKIntoRange(int k) {
  206. /* if k is odd, so must be the result */
  207. if (k < minK)
  208. return minK + ((k ^ minK) & 1);
  209. else if (k > maxK)
  210. return maxK - ((k ^ maxK) & 1);
  211. return k;
  212. }
  213. void initialize(int k, int x, int minK, int maxK) {
  214. this.minK = minK;
  215. this.maxK = maxK;
  216. beginK = endK = middleK = k;
  217. this.x.clear();
  218. this.x.add(x);
  219. snake.clear();
  220. snake.add(newSnake(k, x));
  221. }
  222. abstract int snake(int k, int x);
  223. abstract int getLeft(int x);
  224. abstract int getRight(int x);
  225. abstract boolean isBetter(int left, int right);
  226. abstract void adjustMinMaxK(final int k, final int x);
  227. abstract boolean meets(int d, int k, int x, long snake);
  228. final long newSnake(int k, int x) {
  229. long y = k + x;
  230. long ret = ((long) x) << 32;
  231. return ret | y;
  232. }
  233. final int snake2x(long snake) {
  234. return (int) (snake >>> 32);
  235. }
  236. final int snake2y(long snake) {
  237. return (int) snake;
  238. }
  239. final boolean makeEdit(long snake1, long snake2) {
  240. int x1 = snake2x(snake1), x2 = snake2x(snake2);
  241. int y1 = snake2y(snake1), y2 = snake2y(snake2);
  242. /*
  243. * Check for incompatible partial edit paths:
  244. * when there are ambiguities, we might have
  245. * hit incompatible (i.e. non-overlapping)
  246. * forward/backward paths.
  247. *
  248. * In that case, just pretend that we have
  249. * an empty edit at the end of one snake; this
  250. * will force a decision which path to take
  251. * in the next recursion step.
  252. */
  253. if (x1 > x2 || y1 > y2) {
  254. x1 = x2;
  255. y1 = y2;
  256. }
  257. edit = new Edit(x1, x2, y1, y2);
  258. return true;
  259. }
  260. boolean calculate(int d) {
  261. prevBeginK = beginK;
  262. prevEndK = endK;
  263. beginK = forceKIntoRange(middleK - d);
  264. endK = forceKIntoRange(middleK + d);
  265. // TODO: handle i more efficiently
  266. // TODO: walk snake(k, getX(d, k)) only once per (d, k)
  267. // TODO: move end points out of the loop to avoid conditionals inside the loop
  268. // go backwards so that we can avoid temp vars
  269. for (int k = endK; k >= beginK; k -= 2) {
  270. int left = -1, right = -1;
  271. long leftSnake = -1L, rightSnake = -1L;
  272. // TODO: refactor into its own function
  273. if (k > prevBeginK) {
  274. int i = getIndex(d - 1, k - 1);
  275. left = x.get(i);
  276. int end = snake(k - 1, left);
  277. leftSnake = left != end ?
  278. newSnake(k - 1, end) :
  279. snake.get(i);
  280. if (meets(d, k - 1, end, leftSnake))
  281. return true;
  282. left = getLeft(end);
  283. }
  284. if (k < prevEndK) {
  285. int i = getIndex(d - 1, k + 1);
  286. right = x.get(i);
  287. int end = snake(k + 1, right);
  288. rightSnake = right != end ?
  289. newSnake(k + 1, end) :
  290. snake.get(i);
  291. if (meets(d, k + 1, end, rightSnake))
  292. return true;
  293. right = getRight(end);
  294. }
  295. int newX;
  296. long newSnake;
  297. if (k >= prevEndK ||
  298. (k > prevBeginK &&
  299. isBetter(left, right))) {
  300. newX = left;
  301. newSnake = leftSnake;
  302. }
  303. else {
  304. newX = right;
  305. newSnake = rightSnake;
  306. }
  307. if (meets(d, k, newX, newSnake))
  308. return true;
  309. adjustMinMaxK(k, newX);
  310. int i = getIndex(d, k);
  311. x.set(i, newX);
  312. snake.set(i, newSnake);
  313. }
  314. return false;
  315. }
  316. }
  317. class ForwardEditPaths extends EditPaths {
  318. final int snake(int k, int x) {
  319. for (; x < endA && k + x < endB; x++)
  320. if (!a.equals(x, b, k + x))
  321. break;
  322. return x;
  323. }
  324. final int getLeft(final int x) {
  325. return x;
  326. }
  327. final int getRight(final int x) {
  328. return x + 1;
  329. }
  330. final boolean isBetter(final int left, final int right) {
  331. return left > right;
  332. }
  333. final void adjustMinMaxK(final int k, final int x) {
  334. if (x >= endA || k + x >= endB) {
  335. if (k > backward.middleK)
  336. maxK = k;
  337. else
  338. minK = k;
  339. }
  340. }
  341. final boolean meets(int d, int k, int x, long snake) {
  342. if (k < backward.beginK || k > backward.endK)
  343. return false;
  344. // TODO: move out of loop
  345. if (((d - 1 + k - backward.middleK) % 2) == 1)
  346. return false;
  347. if (x < backward.getX(d - 1, k))
  348. return false;
  349. makeEdit(snake, backward.getSnake(d - 1, k));
  350. return true;
  351. }
  352. }
  353. class BackwardEditPaths extends EditPaths {
  354. final int snake(int k, int x) {
  355. for (; x > beginA && k + x > beginB; x--)
  356. if (!a.equals(x - 1, b, k + x - 1))
  357. break;
  358. return x;
  359. }
  360. final int getLeft(final int x) {
  361. return x - 1;
  362. }
  363. final int getRight(final int x) {
  364. return x;
  365. }
  366. final boolean isBetter(final int left, final int right) {
  367. return left < right;
  368. }
  369. final void adjustMinMaxK(final int k, final int x) {
  370. if (x <= beginA || k + x <= beginB) {
  371. if (k > forward.middleK)
  372. maxK = k;
  373. else
  374. minK = k;
  375. }
  376. }
  377. final boolean meets(int d, int k, int x, long snake) {
  378. if (k < forward.beginK || k > forward.endK)
  379. return false;
  380. // TODO: move out of loop
  381. if (((d + k - forward.middleK) % 2) == 1)
  382. return false;
  383. if (x > forward.getX(d, k))
  384. return false;
  385. makeEdit(forward.getSnake(d, k), snake);
  386. return true;
  387. }
  388. }
  389. }
  390. // debugging (TODO: remove)
  391. public void print(Sequence s, int begin, int end) {
  392. RawText raw = (RawText)s;
  393. try {
  394. while (begin < end) {
  395. System.err.print("" + begin + ": ");
  396. raw.writeLine(System.err, begin++);
  397. System.err.println("");
  398. }
  399. } catch (Exception e) { e.printStackTrace(); }
  400. }
  401. public void print(int beginA, int endA, int beginB, int endB) {
  402. System.err.println("<<<<<<");
  403. print(a, beginA, endA);
  404. System.err.println("======");
  405. print(b, beginB, endB);
  406. System.err.println(">>>>>>");
  407. }
  408. public static void main(String[] args) {
  409. if (args.length != 2) {
  410. System.err.println("Need 2 arguments");
  411. System.exit(1);
  412. }
  413. try {
  414. RawText a = new RawText(new java.io.File(args[0]));
  415. RawText b = new RawText(new java.io.File(args[1]));
  416. MyersDiff diff = new MyersDiff(a, b);
  417. System.out.println(diff.getEdits().toString());
  418. } catch (Exception e) {
  419. e.printStackTrace();
  420. }
  421. }
  422. }