1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
|
/*
* Copyright (C) 2010, Google Inc. and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.diff;
/**
* Compares two {@link org.eclipse.jgit.diff.Sequence}s to create an
* {@link org.eclipse.jgit.diff.EditList} of changes.
* <p>
* An algorithm's {@code diff} method must be callable from concurrent threads
* without data collisions. This permits some algorithms to use a singleton
* pattern, with concurrent invocations using the same singleton. Other
* algorithms may support parameterization, in which case the caller can create
* a unique instance per thread.
*/
public abstract class DiffAlgorithm {
/**
* Supported diff algorithm
*/
public enum SupportedAlgorithm {
/**
* Myers diff algorithm
*/
MYERS,
/**
* Histogram diff algorithm
*/
HISTOGRAM
}
/**
* Get diff algorithm
*
* @param alg
* the diff algorithm for which an implementation should be
* returned
* @return an implementation of the specified diff algorithm
*/
public static DiffAlgorithm getAlgorithm(SupportedAlgorithm alg) {
switch (alg) {
case MYERS:
return MyersDiff.INSTANCE;
case HISTOGRAM:
return new HistogramDiff();
default:
throw new IllegalArgumentException();
}
}
/**
* Compare two sequences and identify a list of edits between them.
*
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
* {@link org.eclipse.jgit.diff.Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
* {@link org.eclipse.jgit.diff.Edit#getEndB()}.
* @return a modifiable edit list comparing the two sequences. If empty, the
* sequences are identical according to {@code cmp}'s rules. The
* result list is never null.
*/
public <S extends Sequence> EditList diff(
SequenceComparator<? super S> cmp, S a, S b) {
Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
switch (region.getType()) {
case INSERT:
case DELETE:
return EditList.singleton(region);
case REPLACE: {
if (region.getLengthA() == 1 && region.getLengthB() == 1)
return EditList.singleton(region);
SubsequenceComparator<S> cs = new SubsequenceComparator<>(cmp);
Subsequence<S> as = Subsequence.a(a, region);
Subsequence<S> bs = Subsequence.b(b, region);
EditList e = Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
return normalize(cmp, e, a, b);
}
case EMPTY:
return new EditList(0);
default:
throw new IllegalStateException();
}
}
private static <S extends Sequence> Edit coverEdit(S a, S b) {
return new Edit(0, a.size(), 0, b.size());
}
/**
* Reorganize an {@link EditList} for better diff consistency.
* <p>
* {@code DiffAlgorithms} may return {@link Edit.Type#INSERT} or
* {@link Edit.Type#DELETE} edits that can be "shifted". For
* example, the deleted section
* <pre>
* -a
* -b
* -c
* a
* b
* c
* </pre>
* can be shifted down by 1, 2 or 3 locations.
* <p>
* To avoid later merge issues, we shift such edits to a
* consistent location. {@code normalize} uses a simple strategy of
* shifting such edits to their latest possible location.
* <p>
* This strategy may not always produce an aesthetically pleasing
* diff. For instance, it works well with
* <pre>
* function1 {
* ...
* }
*
* +function2 {
* + ...
* +}
* +
* function3 {
* ...
* }
* </pre>
* but less so for
* <pre>
* #
* # comment1
* #
* function1() {
* }
*
* #
* +# comment3
* +#
* +function3() {
* +}
* +
* +#
* # comment2
* #
* function2() {
* }
* </pre>
* <a href="https://github.com/mhagger/diff-slider-tools">More
* sophisticated strategies</a> are possible, say by calculating a
* suitable "aesthetic cost" for each possible position and using
* the lowest cost, but {@code normalize} just shifts edits
* to the end as much as possible.
*
* @param <S>
* type of sequence being compared.
* @param cmp
* the comparator supplying the element equivalence function.
* @param e
* a modifiable edit list comparing the provided sequences.
* @param a
* the first (also known as old or pre-image) sequence.
* @param b
* the second (also known as new or post-image) sequence.
* @return a modifiable edit list with edit regions shifted to their
* latest possible location. The result list is never null.
* @since 4.7
*/
private static <S extends Sequence> EditList normalize(
SequenceComparator<? super S> cmp, EditList e, S a, S b) {
Edit prev = null;
for (int i = e.size() - 1; i >= 0; i--) {
Edit cur = e.get(i);
Edit.Type curType = cur.getType();
int maxA = (prev == null) ? a.size() : prev.beginA;
int maxB = (prev == null) ? b.size() : prev.beginB;
if (curType == Edit.Type.INSERT) {
while (cur.endA < maxA && cur.endB < maxB
&& cmp.equals(b, cur.beginB, b, cur.endB)) {
cur.shift(1);
}
} else if (curType == Edit.Type.DELETE) {
while (cur.endA < maxA && cur.endB < maxB
&& cmp.equals(a, cur.beginA, a, cur.endA)) {
cur.shift(1);
}
}
prev = cur;
}
return e;
}
/**
* Compare two sequences and identify a list of edits between them.
*
* This method should be invoked only after the two sequences have been
* proven to have no common starting or ending elements. The expected
* elimination of common starting and ending elements is automatically
* performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
* method, which invokes this method using
* {@link org.eclipse.jgit.diff.Subsequence}s.
*
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* the first (also known as old or pre-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'A' side: {@link org.eclipse.jgit.diff.Edit#getBeginA()},
* {@link org.eclipse.jgit.diff.Edit#getEndA()}.
* @param b
* the second (also known as new or post-image) sequence. Edits
* returned by this algorithm will reference indexes using the
* 'B' side: {@link org.eclipse.jgit.diff.Edit#getBeginB()},
* {@link org.eclipse.jgit.diff.Edit#getEndB()}.
* @return a modifiable edit list comparing the two sequences.
*/
public abstract <S extends Sequence> EditList diffNonCommon(
SequenceComparator<? super S> cmp, S a, S b);
}
|