* algorithms may support parameterization, in which case the caller can create
* a unique instance per thread.
*/
-public interface DiffAlgorithm {
+public abstract class DiffAlgorithm {
/**
* Compare two sequences and identify a list of edits between them.
- *
+ *
* @param <S>
* type of sequence being compared.
- * @param <C>
- * type of comparator to evaluate the sequence elements.
* @param cmp
* the comparator supplying the element equivalence function.
* @param a
* sequences are identical according to {@code cmp}'s rules. The
* result list is never null.
*/
- public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
- C cmp, S a, S b);
+ public <S extends Sequence> EditList diff(
+ SequenceComparator<? super S> cmp, S a, S b) {
+ Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
+
+ switch (region.getType()) {
+ case INSERT:
+ case DELETE:
+ return EditList.singleton(region);
+
+ case REPLACE: {
+ SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
+ Subsequence<S> as = Subsequence.a(a, region);
+ Subsequence<S> bs = Subsequence.b(b, region);
+ return Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
+ }
+
+ case EMPTY:
+ return new EditList(0);
+
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
+ private static <S extends Sequence> Edit coverEdit(S a, S b) {
+ return new Edit(0, a.size(), 0, b.size());
+ }
+
+ /**
+ * Compare two sequences and identify a list of edits between them.
+ *
+ * This method should be invoked only after the two sequences have been
+ * proven to have no common starting or ending elements. The expected
+ * elimination of common starting and ending elements is automatically
+ * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
+ * method, which invokes this method using {@link Subsequence}s.
+ *
+ * @param <S>
+ * type of sequence being compared.
+ * @param cmp
+ * the comparator supplying the element equivalence function.
+ * @param a
+ * the first (also known as old or pre-image) sequence. Edits
+ * returned by this algorithm will reference indexes using the
+ * 'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
+ * @param b
+ * the second (also known as new or post-image) sequence. Edits
+ * returned by this algorithm will reference indexes using the
+ * 'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
+ * @return a modifiable edit list comparing the two sequences.
+ */
+ public abstract <S extends Sequence> EditList diffNonCommon(
+ SequenceComparator<? super S> cmp, S a, S b);
}
/** Specialized list of {@link Edit}s in a document. */
public class EditList extends AbstractList<Edit> {
+ /**
+ * Construct an edit list containing a single edit.
+ *
+ * @param edit
+ * the edit to return in the list.
+ * @return list containing only {@code edit}.
+ */
+ public static EditList singleton(Edit edit) {
+ EditList res = new EditList(1);
+ res.add(edit);
+ return res;
+ }
+
private final ArrayList<Edit> container;
/** Create a new, empty edit list. */
container = new ArrayList<Edit>();
}
+ /**
+ * Create an empty edit list with the specified capacity.
+ *
+ * @param capacity
+ * the initial capacity of the edit list. If additional edits are
+ * added to the list, it will be grown to support them.
+ */
+ public EditList(int capacity) {
+ container = new ArrayList<Edit>(capacity);
+ }
+
@Override
public int size() {
return container.size();
public class MyersDiff<S extends Sequence> {
/** Singleton instance of MyersDiff. */
public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
- public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
- C cmp, S a, S b) {
- Edit region = new Edit(0, a.size(), 0, b.size());
- region = cmp.reduceCommonStartEnd(a, b, region);
-
- switch (region.getType()) {
- case INSERT:
- case DELETE: {
- EditList r = new EditList();
- r.add(region);
- return r;
- }
-
- case REPLACE:
- return new MyersDiff<S>(cmp, a, b, region).getEdits();
-
- case EMPTY:
- return new EditList();
-
- default:
- throw new IllegalStateException();
- }
+ public <S extends Sequence> EditList diffNonCommon(
+ SequenceComparator<? super S> cmp, S a, S b) {
+ return new MyersDiff<S>(cmp, a, b).edits;
}
};
protected EditList edits;
/** Comparison function for sequences. */
- protected HashedSequenceComparator<Subsequence<S>> cmp;
+ protected HashedSequenceComparator<S> cmp;
/**
* The first text to be compared. Referred to as "Text A" in the comments
*/
- protected HashedSequence<Subsequence<S>> a;
+ protected HashedSequence<S> a;
/**
* The second text to be compared. Referred to as "Text B" in the comments
*/
- protected HashedSequence<Subsequence<S>> b;
+ protected HashedSequence<S> b;
- private MyersDiff(SequenceComparator<? super S> cmp, S a, S b, Edit region) {
- Subsequence<S> as = Subsequence.a(a, region);
- Subsequence<S> bs = Subsequence.b(b, region);
-
- HashedSequencePair<Subsequence<S>> pair = new HashedSequencePair<Subsequence<S>>(
- new SubsequenceComparator<S>(cmp), as, bs);
+ private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
+ HashedSequencePair<S> pair;
+ pair = new HashedSequencePair<S>(cmp, a, b);
this.cmp = pair.getComparator();
this.a = pair.getA();
this.b = pair.getB();
calculateEdits();
- Subsequence.toBase(edits, as, bs);
- }
-
- /**
- * @return the list of edits found during the last call to {@link #calculateEdits()}
- */
- public EditList getEdits() {
- return edits;
}
// TODO: use ThreadLocal for future multi-threaded operations
try {
RawText a = new RawText(new java.io.File(args[0]));
RawText b = new RawText(new java.io.File(args[1]));
- EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
- System.out.println(res.toString());
+ EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
+ System.out.println(r.toString());
} catch (Exception e) {
e.printStackTrace();
}
* by the prior step 2 or 5.</li>
* </ol>
*/
-public class PatienceDiff implements DiffAlgorithm {
+public class PatienceDiff extends DiffAlgorithm {
/** Algorithm we use when there are no common unique lines in a region. */
private DiffAlgorithm fallback;
fallback = alg;
}
- public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
- C cmp, S a, S b) {
- Edit region = new Edit(0, a.size(), 0, b.size());
- region = cmp.reduceCommonStartEnd(a, b, region);
-
- switch (region.getType()) {
- case INSERT:
- case DELETE: {
- EditList r = new EditList();
- r.add(region);
- return r;
- }
-
- case REPLACE: {
- SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
- Subsequence<S> as = Subsequence.a(a, region);
- Subsequence<S> bs = Subsequence.b(b, region);
- return Subsequence.toBase(diffImpl(cs, as, bs), as, bs);
- }
-
- case EMPTY:
- return new EditList();
-
- default:
- throw new IllegalStateException();
- }
- }
-
- private <S extends Sequence, C extends SequenceComparator<? super S>> EditList diffImpl(
- C cmp, S a, S b) {
+ public <S extends Sequence> EditList diffNonCommon(
+ SequenceComparator<? super S> cmp, S a, S b) {
State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b));
- s.diff(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0);
+ s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0);
return s.edits;
}
this.edits = new EditList();
}
- private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) {
- switch (r.getType()) {
- case INSERT:
- case DELETE:
- edits.add(r);
- return;
-
- case REPLACE:
- break;
-
- case EMPTY:
- default:
- throw new IllegalStateException();
- }
-
+ void diffReplace(Edit r, long[] pCommon, int pIdx, int pEnd) {
PatienceDiffIndex<S> p;
+ Edit lcs;
p = new PatienceDiffIndex<S>(cmp, a, b, r, pCommon, pIdx, pEnd);
- Edit lcs = p.findLongestCommonSequence();
+ lcs = p.findLongestCommonSequence();
if (lcs != null) {
pCommon = p.nCommon;
diff(r.after(lcs), pCommon, pIdx + 1, pEnd);
} else if (fallback != null) {
- p = null;
pCommon = null;
+ p = null;
- SubsequenceComparator<HashedSequence<S>> cs;
- cs = new SubsequenceComparator<HashedSequence<S>>(cmp);
-
+ SubsequenceComparator<HashedSequence<S>> cs = subcmp();
Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);
Subsequence<HashedSequence<S>> bs = Subsequence.b(b, r);
- EditList res = fallback.diff(cs, as, bs);
+
+ EditList res = fallback.diffNonCommon(cs, as, bs);
edits.addAll(Subsequence.toBase(res, as, bs));
} else {
edits.add(r);
}
}
+
+ private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) {
+ switch (r.getType()) {
+ case INSERT:
+ case DELETE:
+ edits.add(r);
+ break;
+
+ case REPLACE:
+ diffReplace(r, pCommon, pIdx, pEnd);
+ break;
+
+ case EMPTY:
+ default:
+ throw new IllegalStateException();
+ }
+ }
+
+ private SubsequenceComparator<HashedSequence<S>> subcmp() {
+ return new SubsequenceComparator<HashedSequence<S>>(cmp);
+ }
}
}