You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DiffAlgorithm.java 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * Copyright (C) 2010, Google Inc. and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.diff;
  11. /**
  12. * Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
  13. * {@link org.eclipse.jgit.diff.EditList} of changes.
  14. * <p>
  15. * An algorithm's {@code diff} method must be callable from concurrent threads
  16. * without data collisions. This permits some algorithms to use a singleton
  17. * pattern, with concurrent invocations using the same singleton. Other
  18. * algorithms may support parameterization, in which case the caller can create
  19. * a unique instance per thread.
  20. */
  21. public abstract class DiffAlgorithm {
  22. /**
  23. * Supported diff algorithm
  24. */
  25. public enum SupportedAlgorithm {
  26. /**
  27. * Myers diff algorithm
  28. */
  29. MYERS,
  30. /**
  31. * Histogram diff algorithm
  32. */
  33. HISTOGRAM
  34. }
  35. /**
  36. * Get diff algorithm
  37. *
  38. * @param alg
  39. * the diff algorithm for which an implementation should be
  40. * returned
  41. * @return an implementation of the specified diff algorithm
  42. */
  43. public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
  44. switch (alg) {
  45. case MYERS:
  46. return MyersDiff.INSTANCE;
  47. case HISTOGRAM:
  48. return new HistogramDiff();
  49. default:
  50. throw new IllegalArgumentException();
  51. }
  52. }
  53. /**
  54. * Compare two sequences and identify a list of edits between them.
  55. *
  56. * @param cmp
  57. * the comparator supplying the element equivalence function.
  58. * @param a
  59. * the first (also known as old or pre-image) sequence. Edits
  60. * returned by this algorithm will reference indexes using the
  61. * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
  62. * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
  63. * @param b
  64. * the second (also known as new or post-image) sequence. Edits
  65. * returned by this algorithm will reference indexes using the
  66. * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
  67. * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
  68. * @return a modifiable edit list comparing the two sequences. If empty, the
  69. * sequences are identical according to {@code cmp}'s rules. The
  70. * result list is never null.
  71. */
  72. public <S extends Sequence> EditList diff(
  73. SequenceComparator<? super S> cmp, S a, S b) {
  74. Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
  75. switch (region.getType()) {
  76. case INSERT:
  77. case DELETE:
  78. return EditList.singleton(region);
  79. case REPLACE: {
  80. if (region.getLengthA() == 1 && region.getLengthB() == 1)
  81. return EditList.singleton(region);
  82. SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
  83. Subsequence<S> as = Subsequence.a(a, region);
  84. Subsequence<S> bs = Subsequence.b(b, region);
  85. EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
  86. return normalize(cmp, e, a, b);
  87. }
  88. case EMPTY:
  89. return new EditList(0);
  90. default:
  91. throw new IllegalStateException();
  92. }
  93. }
  94. private static <S extends Sequence> Edit coverEdit(S a, S b) {
  95. return new Edit(0, a.size(), 0, b.size());
  96. }
  97. /**
  98. * Reorganize an {@link EditList} for better diff consistency.
  99. * <p>
  100. * {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
  101. * {@link Edit.Type#DELETE} edits that can be "shifted". For
  102. * example, the deleted section
  103. * <pre>
  104. * -a
  105. * -b
  106. * -c
  107. * a
  108. * b
  109. * c
  110. * </pre>
  111. * can be shifted down by 1, 2 or 3 locations.
  112. * <p>
  113. * To avoid later merge issues, we shift such edits to a
  114. * consistent location. {@code normalize} uses a simple strategy of
  115. * shifting such edits to their latest possible location.
  116. * <p>
  117. * This strategy may not always produce an aesthetically pleasing
  118. * diff. For instance, it works well with
  119. * <pre>
  120. * function1 {
  121. * ...
  122. * }
  123. *
  124. * +function2 {
  125. * + ...
  126. * +}
  127. * +
  128. * function3 {
  129. * ...
  130. * }
  131. * </pre>
  132. * but less so for
  133. * <pre>
  134. * #
  135. * # comment1
  136. * #
  137. * function1() {
  138. * }
  139. *
  140. * #
  141. * +# comment3
  142. * +#
  143. * +function3() {
  144. * +}
  145. * +
  146. * +#
  147. * # comment2
  148. * #
  149. * function2() {
  150. * }
  151. * </pre>
  152. * <a href="https://github.com/mhagger/diff-slider-tools">More
  153. * sophisticated strategies</a> are possible, say by calculating a
  154. * suitable "aesthetic cost" for each possible position and using
  155. * the lowest cost, but {@code normalize} just shifts edits
  156. * to the end as much as possible.
  157. *
  158. * @param <S>
  159. * type of sequence being compared.
  160. * @param cmp
  161. * the comparator supplying the element equivalence function.
  162. * @param e
  163. * a modifiable edit list comparing the provided sequences.
  164. * @param a
  165. * the first (also known as old or pre-image) sequence.
  166. * @param b
  167. * the second (also known as new or post-image) sequence.
  168. * @return a modifiable edit list with edit regions shifted to their
  169. * latest possible location. The result list is never null.
  170. * @since 4.7
  171. */
  172. private static <S extends Sequence> EditList normalize(
  173. SequenceComparator<? super S> cmp, EditList e, S a, S b) {
  174. Edit prev = null;
  175. for (int i = e.size() - 1; i >= 0; i--) {
  176. Edit cur = e.get(i);
  177. Edit.Type curType = cur.getType();
  178. int maxA = (prev == null) ? a.size() : prev.beginA;
  179. int maxB = (prev == null) ? b.size() : prev.beginB;
  180. if (curType == Edit.Type.INSERT) {
  181. while (cur.endA < maxA && cur.endB < maxB
  182. && cmp.equals(b, cur.beginB, b, cur.endB)) {
  183. cur.shift(1);
  184. }
  185. } else if (curType == Edit.Type.DELETE) {
  186. while (cur.endA < maxA && cur.endB < maxB
  187. && cmp.equals(a, cur.beginA, a, cur.endA)) {
  188. cur.shift(1);
  189. }
  190. }
  191. prev = cur;
  192. }
  193. return e;
  194. }
  195. /**
  196. * Compare two sequences and identify a list of edits between them.
  197. *
  198. * This method should be invoked only after the two sequences have been
  199. * proven to have no common starting or ending elements. The expected
  200. * elimination of common starting and ending elements is automatically
  201. * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
  202. * method, which invokes this method using
  203. * {@link org.eclipse.jgit.diff.Subsequence}s.
  204. *
  205. * @param cmp
  206. * the comparator supplying the element equivalence function.
  207. * @param a
  208. * the first (also known as old or pre-image) sequence. Edits
  209. * returned by this algorithm will reference indexes using the
  210. * 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
  211. * {@link org.eclipse.jgit.diff.Edit#getEndA()}.
  212. * @param b
  213. * the second (also known as new or post-image) sequence. Edits
  214. * returned by this algorithm will reference indexes using the
  215. * 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
  216. * {@link org.eclipse.jgit.diff.Edit#getEndB()}.
  217. * @return a modifiable edit list comparing the two sequences.
  218. */
  219. public abstract <S extends Sequence> EditList diffNonCommon(
  220. SequenceComparator<? super S> cmp, S a, S b);
  221. }