summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java8
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java113
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java1
3 files changed, 97 insertions, 25 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
index a4edb3f2b5..115d9baff3 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
@@ -114,6 +114,14 @@ public class DiffFormatter {
formatEdits(out, a, b, head.toEditList());
}
+ /**
+ * Formats a list of edits in unified diff format
+ * @param out where the unified diff is written to
+ * @param a the text A which was compared
+ * @param b the text B which was compared
+ * @param edits some differences which have been calculated between A and B
+ * @throws IOException
+ */
public void formatEdits(final OutputStream out, final RawText a,
final RawText b, final EditList edits) throws IOException {
for (int curIdx = 0; curIdx < edits.size();) {
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
index a1c485aa04..055729961b 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
@@ -44,23 +44,92 @@
package org.eclipse.jgit.diff;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.LongList;
+/**
+ * Diff algorithm, based on "An O(ND) Difference Algorithm and its
+ * Variations", by Eugene Myers.
+ *
+ * The basic idea is to put the line numbers of text A as columns ("x") and the
+ * lines of text B as rows ("y"). Now you try to find the shortest "edit path"
+ * from the upper left corner to the lower right corner, where you can
+ * always go horizontally or vertically, but diagonally from (x,y) to
+ * (x+1,y+1) only if line x in text A is identical to line y in text B.
+ *
+ * Myers' fundamental concept is the "furthest reaching D-path on diagonal k":
+ * a D-path is an edit path starting at the upper left corner and containing
+ * exactly D non-diagonal elements ("differences"). The furthest reaching
+ * D-path on diagonal k is the one that contains the most (diagonal) elements
+ * which ends on diagonal k (where k = y - x).
+ *
+ * Example:
+ *
+ * H E L L O W O R L D
+ * ____
+ * L \___
+ * O \___
+ * W \________
+ *
+ * Since every D-path has exactly D horizontal or vertical elements, it can
+ * only end on the diagonals -D, -D+2, ..., D-2, D.
+ *
+ * Since every furthest reaching D-path contains at least one furthest
+ * reaching (D-1)-path (except for D=0), we can construct them recursively.
+ *
+ * Since we are really interested in the shortest edit path, we can start
+ * looking for a 0-path, then a 1-path, and so on, until we find a path that
+ * ends in the lower right corner.
+ *
+ * To save space, we do not need to store all paths (which has quadratic space
+ * requirements), but generate the D-paths simultaneously from both sides.
+ * When the ends meet, we will have found "the middle" of the path. From the
+ * end points of that diagonal part, we can generate the rest recursively.
+ *
+ * This only requires linear space.
+ *
+ * The overall (runtime) complexity is
+ *
+ * O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...)
+ * = O(N * D^2 * 5 / 4) = O(N * D^2),
+ *
+ * (With each step, we have to find the middle parts of twice as many regions
+ * as before, but the regions (as well as the D) are halved.)
+ *
+ * So the overall runtime complexity stays the same with linear space,
+ * albeit with a larger constant factor.
+ */
public class MyersDiff {
+ /**
+ * The list of edits found during the last call to {@link #calculateEdits()}
+ */
protected EditList edits;
- protected Sequence a, b;
+ /**
+ * The first text to be compared. Referred to as "Text A" in the comments
+ */
+ protected Sequence a;
+
+ /**
+ * The second text to be compared. Referred to as "Text B" in the comments
+ */
+ protected Sequence b;
+
+ /**
+ * The only constructor
+ *
+ * @param a the text A which should be compared
+ * @param b the text B which should be compared
+ */
public MyersDiff(Sequence a, Sequence b) {
this.a = a;
this.b = b;
calculateEdits();
}
+ /**
+ * @return the list of edits found during the last call to {@link #calculateEdits()}
+ */
public EditList getEdits() {
return edits;
}
@@ -68,6 +137,10 @@ public class MyersDiff {
// TODO: use ThreadLocal for future multi-threaded operations
MiddleEdit middle = new MiddleEdit();
+ /**
+ * Entrypoint into the algorithm this class is all about. This method triggers that the
+ * differences between A and B are calculated in form of a list of edits.
+ */
protected void calculateEdits() {
edits = new EditList();
@@ -80,6 +153,13 @@ public class MyersDiff {
middle.beginB, middle.endB);
}
+ /**
+ * Calculates the differences between a given part of A against another given part of B
+ * @param beginA start of the part of A which should be compared (0<=beginA<sizeof(A))
+ * @param endA end of the part of A which should be compared (beginA<=endA<sizeof(A))
+ * @param beginB start of the part of B which should be compared (0<=beginB<sizeof(B))
+ * @param endB end of the part of B which should be compared (beginB<=endB<sizeof(B))
+ */
protected void calculateEdits(int beginA, int endA,
int beginB, int endB) {
Edit edit = middle.calculate(beginA, endA, beginB, endB);
@@ -435,26 +515,9 @@ if (k < beginK || k > endK)
}
}
- // debugging (TODO: remove)
- public void print(Sequence s, int begin, int end) {
- RawText raw = (RawText)s;
- try {
- while (begin < end) {
- System.err.print("" + begin + ": ");
- raw.writeLine(System.err, begin++);
- System.err.println("");
- }
- } catch (Exception e) { e.printStackTrace(); }
- }
-
- public void print(int beginA, int endA, int beginB, int endB) {
- System.err.println("<<<<<<");
- print(a, beginA, endA);
- System.err.println("======");
- print(b, beginB, endB);
- System.err.println(">>>>>>");
- }
-
+ /**
+ * @param args two filenames specifying the contents to be diffed
+ */
public static void main(String[] args) {
if (args.length != 2) {
System.err.println("Need 2 arguments");
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index 0bbbb1991a..9a206af190 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -96,6 +96,7 @@ public class RawText implements Sequence {
*
* @param file
* the text file.
+ * @throws IOException if Exceptions occur while reading the file
*/
public RawText(File file) throws IOException {
this(readFile(file));