@@ -64,7 +64,7 @@ import org.eclipse.jgit.treewalk.filter.PathFilter; | |||
/** | |||
* Supplies the content of a file for {@link DiffFormatter}. | |||
* | |||
* <p> | |||
* A content source is not thread-safe. Sources may contain state, including | |||
* information about the last ObjectLoader they returned. Callers must be | |||
* careful to ensure there is no more than one ObjectLoader pending on any |
@@ -45,7 +45,7 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Compares two {@link Sequence}s to create an {@link EditList} of changes. | |||
* | |||
* <p> | |||
* An algorithm's {@code diff} method must be callable from concurrent threads | |||
* without data collisions. This permits some algorithms to use a singleton | |||
* pattern, with concurrent invocations using the same singleton. Other |
@@ -45,11 +45,11 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Wraps a {@link Sequence} to assign hash codes to elements. | |||
* | |||
* <p> | |||
* This sequence acts as a proxy for the real sequence, caching element hash | |||
* codes so they don't need to be recomputed each time. Sequences of this type | |||
* must be used with a {@link HashedSequenceComparator}. | |||
* | |||
* <p> | |||
* To construct an instance of this type use {@link HashedSequencePair}. | |||
* | |||
* @param <S> |
@@ -45,11 +45,11 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Wrap another comparator for use with {@link HashedSequence}. | |||
* | |||
* <p> | |||
* This comparator acts as a proxy for the real comparator, evaluating the | |||
* cached hash code before testing the underlying comparator's equality. | |||
* Comparators of this type must be used with a {@link HashedSequence}. | |||
* | |||
* <p> | |||
* To construct an instance of this type use {@link HashedSequencePair}. | |||
* | |||
* @param <S> |
@@ -45,7 +45,7 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Wraps two {@link Sequence} instances to cache their element hash codes. | |||
* | |||
* <p> | |||
* This pair wraps two sequences that contain cached hash codes for the input | |||
* sequences. | |||
* |
@@ -45,11 +45,11 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* An extended form of Bram Cohen's patience diff algorithm. | |||
* | |||
* <p> | |||
* This implementation was derived by using the 4 rules that are outlined in | |||
* Bram Cohen's <a href="http://bramcohen.livejournal.com/73318.html">blog</a>, | |||
* and then was further extended to support low-occurrence common elements. | |||
* | |||
* <p> | |||
* The basic idea of the algorithm is to create a histogram of occurrences for | |||
* each element of sequence A. Each element of sequence B is then considered in | |||
* turn. If the element also exists in sequence A, and has a lower occurrence | |||
@@ -58,34 +58,34 @@ package org.eclipse.jgit.diff; | |||
* lowest number of occurrences is chosen as a split point. The region is split | |||
* around the LCS, and the algorithm is recursively applied to the sections | |||
* before and after the LCS. | |||
* | |||
* <p> | |||
* By always selecting a LCS position with the lowest occurrence count, this | |||
* algorithm behaves exactly like Bram Cohen's patience diff whenever there is a | |||
* unique common element available between the two sequences. When no unique | |||
* elements exist, the lowest occurrence element is chosen instead. This offers | |||
* more readable diffs than simply falling back on the standard Myers' O(ND) | |||
* algorithm would produce. | |||
* | |||
* <p> | |||
* To prevent the algorithm from having an O(N^2) running time, an upper limit | |||
* on the number of unique elements in a histogram bucket is configured by | |||
* {@link #setMaxChainLength(int)}. If sequence A has more than this many | |||
* elements that hash into the same hash bucket, the algorithm passes the region | |||
* to {@link #setFallbackAlgorithm(DiffAlgorithm)}. If no fallback algorithm is | |||
* configured, the region is emitted as a replace edit. | |||
* | |||
* <p> | |||
* During scanning of sequence B, any element of A that occurs more than | |||
* {@link #setMaxChainLength(int)} times is never considered for an LCS match | |||
* position, even if it is common between the two sequences. This limits the | |||
* number of locations in sequence A that must be considered to find the LCS, | |||
* and helps maintain a lower running time bound. | |||
* | |||
* <p> | |||
* So long as {@link #setMaxChainLength(int)} is a small constant (such as 64), | |||
* the algorithm runs in O(N * D) time, where N is the sum of the input lengths | |||
* and D is the number of edits in the resulting EditList. If the supplied | |||
* {@link SequenceComparator} has a good hash function, this implementation | |||
* typically out-performs {@link MyersDiff}, even though its theoretical running | |||
* time is the same. | |||
* | |||
* <p> | |||
* This implementation has an internal limitation that prevents it from handling | |||
* sequences with more than 268,435,456 (2^28) elements. | |||
*/ |
@@ -47,7 +47,7 @@ import org.eclipse.jgit.internal.JGitText; | |||
/** | |||
* Support {@link HistogramDiff} by computing occurrence counts of elements. | |||
* | |||
* <p> | |||
* Each element in the range being considered is put into a hash table, tracking | |||
* the number of times that distinct element appears in the sequence. Once all | |||
* elements have been inserted from sequence A, each element of sequence B is |
@@ -51,56 +51,60 @@ import org.eclipse.jgit.util.IntList; | |||
import org.eclipse.jgit.util.LongList; | |||
/** | |||
* Diff algorithm, based on "An O(ND) Difference Algorithm and its | |||
* Variations", by Eugene Myers. | |||
* | |||
* Diff algorithm, based on "An O(ND) Difference Algorithm and its Variations", | |||
* by Eugene Myers. | |||
* <p> | |||
* The basic idea is to put the line numbers of text A as columns ("x") and the | |||
* lines of text B as rows ("y"). Now you try to find the shortest "edit path" | |||
* from the upper left corner to the lower right corner, where you can | |||
* always go horizontally or vertically, but diagonally from (x,y) to | |||
* (x+1,y+1) only if line x in text A is identical to line y in text B. | |||
* | |||
* Myers' fundamental concept is the "furthest reaching D-path on diagonal k": | |||
* a D-path is an edit path starting at the upper left corner and containing | |||
* exactly D non-diagonal elements ("differences"). The furthest reaching | |||
* D-path on diagonal k is the one that contains the most (diagonal) elements | |||
* which ends on diagonal k (where k = y - x). | |||
* | |||
* lines of text B as rows ("y"). Now you try to find the shortest "edit path" | |||
* from the upper left corner to the lower right corner, where you can always go | |||
* horizontally or vertically, but diagonally from (x,y) to (x+1,y+1) only if | |||
* line x in text A is identical to line y in text B. | |||
* <p> | |||
* Myers' fundamental concept is the "furthest reaching D-path on diagonal k": a | |||
* D-path is an edit path starting at the upper left corner and containing | |||
* exactly D non-diagonal elements ("differences"). The furthest reaching D-path | |||
* on diagonal k is the one that contains the most (diagonal) elements which | |||
* ends on diagonal k (where k = y - x). | |||
* <p> | |||
* Example: | |||
* | |||
* <pre> | |||
* H E L L O W O R L D | |||
* ____ | |||
* L \___ | |||
* O \___ | |||
* W \________ | |||
* | |||
* Since every D-path has exactly D horizontal or vertical elements, it can | |||
* only end on the diagonals -D, -D+2, ..., D-2, D. | |||
* | |||
* Since every furthest reaching D-path contains at least one furthest | |||
* reaching (D-1)-path (except for D=0), we can construct them recursively. | |||
* | |||
* </pre> | |||
* <p> | |||
* Since every D-path has exactly D horizontal or vertical elements, it can only | |||
* end on the diagonals -D, -D+2, ..., D-2, D. | |||
* <p> | |||
* Since every furthest reaching D-path contains at least one furthest reaching | |||
* (D-1)-path (except for D=0), we can construct them recursively. | |||
* <p> | |||
* Since we are really interested in the shortest edit path, we can start | |||
* looking for a 0-path, then a 1-path, and so on, until we find a path that | |||
* ends in the lower right corner. | |||
* | |||
* <p> | |||
* To save space, we do not need to store all paths (which has quadratic space | |||
* requirements), but generate the D-paths simultaneously from both sides. | |||
* When the ends meet, we will have found "the middle" of the path. From the | |||
* end points of that diagonal part, we can generate the rest recursively. | |||
* | |||
* requirements), but generate the D-paths simultaneously from both sides. When | |||
* the ends meet, we will have found "the middle" of the path. From the end | |||
* points of that diagonal part, we can generate the rest recursively. | |||
* <p> | |||
* This only requires linear space. | |||
* <p> | |||
* The overall (runtime) complexity is: | |||
* | |||
* The overall (runtime) complexity is | |||
* | |||
* O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...) | |||
* = O(N * D^2 * 5 / 4) = O(N * D^2), | |||
* | |||
* (With each step, we have to find the middle parts of twice as many regions | |||
* as before, but the regions (as well as the D) are halved.) | |||
* | |||
* So the overall runtime complexity stays the same with linear space, | |||
* albeit with a larger constant factor. | |||
* <pre> | |||
* O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...) | |||
* = O(N * D^2 * 5 / 4) = O(N * D^2), | |||
* </pre> | |||
* <p> | |||
* (With each step, we have to find the middle parts of twice as many regions as | |||
* before, but the regions (as well as the D) are halved.) | |||
* <p> | |||
* So the overall runtime complexity stays the same with linear space, albeit | |||
* with a larger constant factor. | |||
* | |||
* @param <S> | |||
* type of sequence. |
@@ -46,15 +46,15 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Arbitrary sequence of elements. | |||
* | |||
* <p> | |||
* A sequence of elements is defined to contain elements in the index range | |||
* <code>[0, {@link #size()})</code>, like a standard Java List implementation. | |||
* Unlike a List, the members of the sequence are not directly obtainable. | |||
* | |||
* <p> | |||
* Implementations of Sequence are primarily intended for use in content | |||
* difference detection algorithms, to produce an {@link EditList} of | |||
* {@link Edit} instances describing how two Sequence instances differ. | |||
* | |||
* <p> | |||
* To be compared against another Sequence of the same type, a supporting | |||
* {@link SequenceComparator} must also be supplied. | |||
*/ |
@@ -45,11 +45,11 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Equivalence function for a {@link Sequence} compared by difference algorithm. | |||
* | |||
* <p> | |||
* Difference algorithms can use a comparator to compare portions of two | |||
* sequences and discover the minimal edits required to transform from one | |||
* sequence to the other sequence. | |||
* | |||
* <p> | |||
* Indexes within a sequence are zero-based. | |||
* | |||
* @param <S> |
@@ -45,7 +45,7 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Wraps a {@link Sequence} to have a narrower range of elements. | |||
* | |||
* <p> | |||
* This sequence acts as a proxy for the real sequence, translating element | |||
* indexes on the fly by adding {@code begin} to them. Sequences of this type | |||
* must be used with a {@link SubsequenceComparator}. | |||
@@ -56,7 +56,7 @@ package org.eclipse.jgit.diff; | |||
public final class Subsequence<S extends Sequence> extends Sequence { | |||
/** | |||
* Construct a subsequence around the A region/base sequence. | |||
* | |||
* | |||
* @param <S> | |||
* the base sequence type. | |||
* @param a | |||
@@ -71,7 +71,7 @@ public final class Subsequence<S extends Sequence> extends Sequence { | |||
/** | |||
* Construct a subsequence around the B region/base sequence. | |||
* | |||
* | |||
* @param <S> | |||
* the base sequence type. | |||
* @param b | |||
@@ -86,7 +86,7 @@ public final class Subsequence<S extends Sequence> extends Sequence { | |||
/** | |||
* Adjust the Edit to reflect positions in the base sequence. | |||
* | |||
* | |||
* @param <S> | |||
* the base sequence type. | |||
* @param e |
@@ -45,7 +45,7 @@ package org.eclipse.jgit.diff; | |||
/** | |||
* Wrap another comparator for use with {@link Subsequence}. | |||
* | |||
* <p> | |||
* This comparator acts as a proxy for the real comparator, translating element | |||
* indexes on the fly by adding the subsequence's begin offset to them. | |||
* Comparators of this type must be used with a {@link Subsequence}. |