]> source.dussan.org Git - jgit.git/commitdiff
Perform common start/end elimination by default for DiffAlgorithm 39/1639/2
authorShawn O. Pearce <spearce@spearce.org>
Tue, 21 Sep 2010 15:12:51 +0000 (08:12 -0700)
committerShawn O. Pearce <spearce@spearce.org>
Wed, 22 Sep 2010 01:00:59 +0000 (18:00 -0700)
As it turns out, every single diff algorithm we might try to
implement can benfit from using the SequenceComparator's native
concept of the simple reduceCommonStartEnd() step.  For most inputs,
there can be a significant number of elements that can be removed
from the space the DiffAlgorithm needs to consider, which will
reduce the overall running time for the final solution.

Pool this logic inside of DiffAlgorithm itself as a default, but
permit a specific algorithm to override it when necessary.

Convert MyersDiff to use this reduction to reduce the space it
needs to search, making it perform slightly better on common inputs.

Change-Id: I14004d771117e4a4ab2a02cace8deaeda9814bc1
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffAlgorithm.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/EditList.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiff.java

index 94cb0b60f8bc57451bc7a24a7dfe49d97d1ca636..2fa89cc58dbede32b98ee0904ff2994964884522 100644 (file)
@@ -52,14 +52,12 @@ package org.eclipse.jgit.diff;
  * algorithms may support parameterization, in which case the caller can create
  * a unique instance per thread.
  */
-public interface DiffAlgorithm {
+public abstract class DiffAlgorithm {
        /**
         * Compare two sequences and identify a list of edits between them.
-        * 
+        *
         * @param <S>
         *            type of sequence being compared.
-        * @param <C>
-        *            type of comparator to evaluate the sequence elements.
         * @param cmp
         *            the comparator supplying the element equivalence function.
         * @param a
@@ -74,6 +72,57 @@ public interface DiffAlgorithm {
         *         sequences are identical according to {@code cmp}'s rules. The
         *         result list is never null.
         */
-       public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
-                       C cmp, S a, S b);
+       public <S extends Sequence> EditList diff(
+                       SequenceComparator<? super S> cmp, S a, S b) {
+               Edit region = cmp.reduceCommonStartEnd(a, b, coverEdit(a, b));
+
+               switch (region.getType()) {
+               case INSERT:
+               case DELETE:
+                       return EditList.singleton(region);
+
+               case REPLACE: {
+                       SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
+                       Subsequence<S> as = Subsequence.a(a, region);
+                       Subsequence<S> bs = Subsequence.b(b, region);
+                       return Subsequence.toBase(diffNonCommon(cs, as, bs), as, bs);
+               }
+
+               case EMPTY:
+                       return new EditList(0);
+
+               default:
+                       throw new IllegalStateException();
+               }
+       }
+
+       private static <S extends Sequence> Edit coverEdit(S a, S b) {
+               return new Edit(0, a.size(), 0, b.size());
+       }
+
+       /**
+        * Compare two sequences and identify a list of edits between them.
+        *
+        * This method should be invoked only after the two sequences have been
+        * proven to have no common starting or ending elements. The expected
+        * elimination of common starting and ending elements is automatically
+        * performed by the {@link #diff(SequenceComparator, Sequence, Sequence)}
+        * method, which invokes this method using {@link Subsequence}s.
+        *
+        * @param <S>
+        *            type of sequence being compared.
+        * @param cmp
+        *            the comparator supplying the element equivalence function.
+        * @param a
+        *            the first (also known as old or pre-image) sequence. Edits
+        *            returned by this algorithm will reference indexes using the
+        *            'A' side: {@link Edit#getBeginA()}, {@link Edit#getEndA()}.
+        * @param b
+        *            the second (also known as new or post-image) sequence. Edits
+        *            returned by this algorithm will reference indexes using the
+        *            'B' side: {@link Edit#getBeginB()}, {@link Edit#getEndB()}.
+        * @return a modifiable edit list comparing the two sequences.
+        */
+       public abstract <S extends Sequence> EditList diffNonCommon(
+                       SequenceComparator<? super S> cmp, S a, S b);
 }
index 85a53964406fefe46c083e6feb92544534c9e329..a8088207f9b72a381afcf914f748cbaa77cdd39b 100644 (file)
@@ -48,6 +48,19 @@ import java.util.ArrayList;
 
 /** Specialized list of {@link Edit}s in a document. */
 public class EditList extends AbstractList<Edit> {
+       /**
+        * Construct an edit list containing a single edit.
+        *
+        * @param edit
+        *            the edit to return in the list.
+        * @return list containing only {@code edit}.
+        */
+       public static EditList singleton(Edit edit) {
+               EditList res = new EditList(1);
+               res.add(edit);
+               return res;
+       }
+
        private final ArrayList<Edit> container;
 
        /** Create a new, empty edit list. */
@@ -55,6 +68,17 @@ public class EditList extends AbstractList<Edit> {
                container = new ArrayList<Edit>();
        }
 
+       /**
+        * Create an empty edit list with the specified capacity.
+        *
+        * @param capacity
+        *            the initial capacity of the edit list. If additional edits are
+        *            added to the list, it will be grown to support them.
+        */
+       public EditList(int capacity) {
+               container = new ArrayList<Edit>(capacity);
+       }
+
        @Override
        public int size() {
                return container.size();
index 3fad2c349e4dd7b6faf7b6047c803dd9103547a9..821d06be296dab965e9351646616457ccf7ad902 100644 (file)
@@ -108,28 +108,9 @@ import org.eclipse.jgit.util.LongList;
 public class MyersDiff<S extends Sequence> {
        /** Singleton instance of MyersDiff. */
        public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
-               public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
-                               C cmp, S a, S b) {
-                       Edit region = new Edit(0, a.size(), 0, b.size());
-                       region = cmp.reduceCommonStartEnd(a, b, region);
-
-                       switch (region.getType()) {
-                       case INSERT:
-                       case DELETE: {
-                               EditList r = new EditList();
-                               r.add(region);
-                               return r;
-                       }
-
-                       case REPLACE:
-                               return new MyersDiff<S>(cmp, a, b, region).getEdits();
-
-                       case EMPTY:
-                               return new EditList();
-
-                       default:
-                               throw new IllegalStateException();
-                       }
+               public <S extends Sequence> EditList diffNonCommon(
+                               SequenceComparator<? super S> cmp, S a, S b) {
+                       return new MyersDiff<S>(cmp, a, b).edits;
                }
        };
 
@@ -139,38 +120,27 @@ public class MyersDiff<S extends Sequence> {
        protected EditList edits;
 
        /** Comparison function for sequences. */
-       protected HashedSequenceComparator<Subsequence<S>> cmp;
+       protected HashedSequenceComparator<S> cmp;
 
        /**
         * The first text to be compared. Referred to as "Text A" in the comments
         */
-       protected HashedSequence<Subsequence<S>> a;
+       protected HashedSequence<S> a;
 
        /**
         * The second text to be compared. Referred to as "Text B" in the comments
         */
-       protected HashedSequence<Subsequence<S>> b;
+       protected HashedSequence<S> b;
 
-       private MyersDiff(SequenceComparator<? super S> cmp, S a, S b, Edit region) {
-               Subsequence<S> as = Subsequence.a(a, region);
-               Subsequence<S> bs = Subsequence.b(b, region);
-
-               HashedSequencePair<Subsequence<S>> pair = new HashedSequencePair<Subsequence<S>>(
-                               new SubsequenceComparator<S>(cmp), as, bs);
+       private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
+               HashedSequencePair<S> pair;
 
+               pair = new HashedSequencePair<S>(cmp, a, b);
                this.cmp = pair.getComparator();
                this.a = pair.getA();
                this.b = pair.getB();
 
                calculateEdits();
-               Subsequence.toBase(edits, as, bs);
-       }
-
-       /**
-        * @return the list of edits found during the last call to {@link #calculateEdits()}
-        */
-       public EditList getEdits() {
-               return edits;
        }
 
        // TODO: use ThreadLocal for future multi-threaded operations
@@ -565,8 +535,8 @@ if (k < beginK || k > endK)
                try {
                        RawText a = new RawText(new java.io.File(args[0]));
                        RawText b = new RawText(new java.io.File(args[1]));
-                       EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
-                       System.out.println(res.toString());
+                       EditList r = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
+                       System.out.println(r.toString());
                } catch (Exception e) {
                        e.printStackTrace();
                }
index 44e1f79bb4d9e62a42f5023b4a41b16933ad3d74..571a498ae90a943651bee437a6d31a30cfbc3ea7 100644 (file)
@@ -99,7 +99,7 @@ package org.eclipse.jgit.diff;
  * by the prior step 2 or 5.</li>
  * </ol>
  */
-public class PatienceDiff implements DiffAlgorithm {
+public class PatienceDiff extends DiffAlgorithm {
        /** Algorithm we use when there are no common unique lines in a region. */
        private DiffAlgorithm fallback;
 
@@ -114,38 +114,10 @@ public class PatienceDiff implements DiffAlgorithm {
                fallback = alg;
        }
 
-       public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
-                       C cmp, S a, S b) {
-               Edit region = new Edit(0, a.size(), 0, b.size());
-               region = cmp.reduceCommonStartEnd(a, b, region);
-
-               switch (region.getType()) {
-               case INSERT:
-               case DELETE: {
-                       EditList r = new EditList();
-                       r.add(region);
-                       return r;
-               }
-
-               case REPLACE: {
-                       SubsequenceComparator<S> cs = new SubsequenceComparator<S>(cmp);
-                       Subsequence<S> as = Subsequence.a(a, region);
-                       Subsequence<S> bs = Subsequence.b(b, region);
-                       return Subsequence.toBase(diffImpl(cs, as, bs), as, bs);
-               }
-
-               case EMPTY:
-                       return new EditList();
-
-               default:
-                       throw new IllegalStateException();
-               }
-       }
-
-       private <S extends Sequence, C extends SequenceComparator<? super S>> EditList diffImpl(
-                       C cmp, S a, S b) {
+       public <S extends Sequence> EditList diffNonCommon(
+                       SequenceComparator<? super S> cmp, S a, S b) {
                State<S> s = new State<S>(new HashedSequencePair<S>(cmp, a, b));
-               s.diff(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0);
+               s.diffReplace(new Edit(0, s.a.size(), 0, s.b.size()), null, 0, 0);
                return s.edits;
        }
 
@@ -166,25 +138,12 @@ public class PatienceDiff implements DiffAlgorithm {
                        this.edits = new EditList();
                }
 
-               private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) {
-                       switch (r.getType()) {
-                       case INSERT:
-                       case DELETE:
-                               edits.add(r);
-                               return;
-
-                       case REPLACE:
-                               break;
-
-                       case EMPTY:
-                       default:
-                               throw new IllegalStateException();
-                       }
-
+               void diffReplace(Edit r, long[] pCommon, int pIdx, int pEnd) {
                        PatienceDiffIndex<S> p;
+                       Edit lcs;
 
                        p = new PatienceDiffIndex<S>(cmp, a, b, r, pCommon, pIdx, pEnd);
-                       Edit lcs = p.findLongestCommonSequence();
+                       lcs = p.findLongestCommonSequence();
 
                        if (lcs != null) {
                                pCommon = p.nCommon;
@@ -196,20 +155,40 @@ public class PatienceDiff implements DiffAlgorithm {
                                diff(r.after(lcs), pCommon, pIdx + 1, pEnd);
 
                        } else if (fallback != null) {
-                               p = null;
                                pCommon = null;
+                               p = null;
 
-                               SubsequenceComparator<HashedSequence<S>> cs;
-                               cs = new SubsequenceComparator<HashedSequence<S>>(cmp);
-
+                               SubsequenceComparator<HashedSequence<S>> cs = subcmp();
                                Subsequence<HashedSequence<S>> as = Subsequence.a(a, r);
                                Subsequence<HashedSequence<S>> bs = Subsequence.b(b, r);
-                               EditList res = fallback.diff(cs, as, bs);
+
+                               EditList res = fallback.diffNonCommon(cs, as, bs);
                                edits.addAll(Subsequence.toBase(res, as, bs));
 
                        } else {
                                edits.add(r);
                        }
                }
+
+               private void diff(Edit r, long[] pCommon, int pIdx, int pEnd) {
+                       switch (r.getType()) {
+                       case INSERT:
+                       case DELETE:
+                               edits.add(r);
+                               break;
+
+                       case REPLACE:
+                               diffReplace(r, pCommon, pIdx, pEnd);
+                               break;
+
+                       case EMPTY:
+                       default:
+                               throw new IllegalStateException();
+                       }
+               }
+
+               private SubsequenceComparator<HashedSequence<S>> subcmp() {
+                       return new SubsequenceComparator<HashedSequence<S>>(cmp);
+               }
        }
 }