aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java4
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java72
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java77
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java108
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java47
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java14
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java33
7 files changed, 306 insertions, 49 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
index ec1ced62a3..1aa76bc434 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java
@@ -882,8 +882,8 @@ public class DiffFormatter {
type = PatchType.BINARY;
} else {
- res.a = new RawText(comparator, aRaw);
- res.b = new RawText(comparator, bRaw);
+ res.a = new RawText(aRaw);
+ res.b = new RawText(bRaw);
editList = diff(res.a, res.b);
type = PatchType.UNIFIED;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java
new file mode 100644
index 0000000000..2a26700c13
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequence.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.diff;
+
+/**
+ * Wraps a {@link Sequence} to assign hash codes to elements.
+ *
+ * This sequence acts as a proxy for the real sequence, caching element hash
+ * codes so they don't need to be recomputed each time. Sequences of this type
+ * must be used with a {@link HashedSequenceComparator}.
+ *
+ * To construct an instance of this type use {@link HashedSequencePair}.
+ *
+ * @param <S>
+ * the base sequence type.
+ */
+public final class HashedSequence<S extends Sequence> extends Sequence {
+ final S base;
+
+ final int[] hashes;
+
+ HashedSequence(S base, int[] hashes) {
+ this.base = base;
+ this.hashes = hashes;
+ }
+
+ @Override
+ public int size() {
+ return base.size();
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java
new file mode 100644
index 0000000000..2aa84e49ee
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequenceComparator.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.diff;
+
+/**
+ * Wrap another comparator for use with {@link HashedSequence}.
+ *
+ * This comparator acts as a proxy for the real comparator, evaluating the
+ * cached hash code before testing the underlying comparator's equality.
+ * Comparators of this type must be used with a {@link HashedSequence}.
+ *
+ * To construct an instance of this type use {@link HashedSequencePair}.
+ *
+ * @param <S>
+ * the base sequence type.
+ */
+public final class HashedSequenceComparator<S extends Sequence> extends
+ SequenceComparator<HashedSequence<S>> {
+ private final SequenceComparator<? super S> cmp;
+
+ HashedSequenceComparator(SequenceComparator<? super S> cmp) {
+ this.cmp = cmp;
+ }
+
+ @Override
+ public boolean equals(HashedSequence<S> a, int ai, //
+ HashedSequence<S> b, int bi) {
+ return a.hashes[ai] == b.hashes[bi]
+ && cmp.equals(a.base, ai, b.base, bi);
+ }
+
+ @Override
+ public int hash(HashedSequence<S> seq, int ptr) {
+ return seq.hashes[ptr];
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java
new file mode 100644
index 0000000000..b72202ffc0
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/HashedSequencePair.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.diff;
+
+/**
+ * Wraps two {@link Sequence} instances to cache their element hash codes.
+ *
+ * This pair wraps two sequences that contain cached hash codes for the input
+ * sequences.
+ *
+ * @param <S>
+ * the base sequence type.
+ */
+public class HashedSequencePair<S extends Sequence> {
+ private final SequenceComparator<? super S> cmp;
+
+ private final S baseA;
+
+ private final S baseB;
+
+ private HashedSequence<S> cachedA;
+
+ private HashedSequence<S> cachedB;
+
+ /**
+ * Construct a pair to provide fast hash codes.
+ *
+ * @param cmp
+ * the base comparator for the sequence elements.
+ * @param a
+ * the A sequence.
+ * @param b
+ * the B sequence.
+ */
+ public HashedSequencePair(SequenceComparator<? super S> cmp, S a, S b) {
+ this.cmp = cmp;
+ this.baseA = a;
+ this.baseB = b;
+ }
+
+ /** @return obtain a comparator that uses the cached hash codes. */
+ public HashedSequenceComparator<S> getComparator() {
+ return new HashedSequenceComparator<S>(cmp);
+ }
+
+ /** @return wrapper around A that includes cached hash codes. */
+ public HashedSequence<S> getA() {
+ if (cachedA == null)
+ cachedA = wrap(baseA);
+ return cachedA;
+ }
+
+ /** @return wrapper around B that includes cached hash codes. */
+ public HashedSequence<S> getB() {
+ if (cachedB == null)
+ cachedB = wrap(baseB);
+ return cachedB;
+ }
+
+ private HashedSequence<S> wrap(S base) {
+ final int end = base.size();
+ final int[] hashes = new int[end];
+ for (int ptr = 0; ptr < end; ptr++)
+ hashes[ptr] = cmp.hash(base, ptr);
+ return new HashedSequence<S>(base, hashes);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
index 45abb58cdd..3fad2c349e 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/MyersDiff.java
@@ -110,7 +110,26 @@ public class MyersDiff<S extends Sequence> {
public static final DiffAlgorithm INSTANCE = new DiffAlgorithm() {
public <S extends Sequence, C extends SequenceComparator<? super S>> EditList diff(
C cmp, S a, S b) {
- return new MyersDiff<S>(cmp, a, b).getEdits();
+ Edit region = new Edit(0, a.size(), 0, b.size());
+ region = cmp.reduceCommonStartEnd(a, b, region);
+
+ switch (region.getType()) {
+ case INSERT:
+ case DELETE: {
+ EditList r = new EditList();
+ r.add(region);
+ return r;
+ }
+
+ case REPLACE:
+ return new MyersDiff<S>(cmp, a, b, region).getEdits();
+
+ case EMPTY:
+ return new EditList();
+
+ default:
+ throw new IllegalStateException();
+ }
}
};
@@ -120,23 +139,31 @@ public class MyersDiff<S extends Sequence> {
protected EditList edits;
/** Comparison function for sequences. */
- protected SequenceComparator<? super S> cmp;
+ protected HashedSequenceComparator<Subsequence<S>> cmp;
/**
* The first text to be compared. Referred to as "Text A" in the comments
*/
- protected S a;
+ protected HashedSequence<Subsequence<S>> a;
/**
* The second text to be compared. Referred to as "Text B" in the comments
*/
- protected S b;
+ protected HashedSequence<Subsequence<S>> b;
+
+ private MyersDiff(SequenceComparator<? super S> cmp, S a, S b, Edit region) {
+ Subsequence<S> as = Subsequence.a(a, region);
+ Subsequence<S> bs = Subsequence.b(b, region);
+
+ HashedSequencePair<Subsequence<S>> pair = new HashedSequencePair<Subsequence<S>>(
+ new SubsequenceComparator<S>(cmp), as, bs);
+
+ this.cmp = pair.getComparator();
+ this.a = pair.getA();
+ this.b = pair.getB();
- private MyersDiff(SequenceComparator<? super S> cmp, S a, S b) {
- this.cmp = cmp;
- this.a = a;
- this.b = b;
calculateEdits();
+ Subsequence.toBase(edits, as, bs);
}
/**
@@ -538,8 +565,8 @@ if (k < beginK || k > endK)
try {
RawText a = new RawText(new java.io.File(args[0]));
RawText b = new RawText(new java.io.File(args[1]));
- MyersDiff diff = new MyersDiff(RawTextComparator.DEFAULT, a, b);
- System.out.println(diff.getEdits().toString());
+ EditList res = INSTANCE.diff(RawTextComparator.DEFAULT, a, b);
+ System.out.println(res.toString());
} catch (Exception e) {
e.printStackTrace();
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index 07d28572ad..7d09874501 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -75,9 +75,6 @@ public class RawText extends Sequence {
/** Map of line number to starting position within {@link #content}. */
protected final IntList lines;
- /** Hash code for each line, for fast equality elimination. */
- protected final int[] hashes;
-
/**
* Create a new sequence from an existing content byte array.
* <p>
@@ -105,7 +102,6 @@ public class RawText extends Sequence {
public RawText(RawTextComparator cmp, byte[] input) {
content = input;
lines = RawParseUtils.lineMap(content, 0, content.length);
- hashes = computeHashes(cmp);
}
/**
@@ -170,16 +166,6 @@ public class RawText extends Sequence {
return content[end - 1] != '\n';
}
- private int[] computeHashes(RawTextComparator cmp) {
- final int[] r = new int[lines.size()];
- for (int lno = 1; lno < lines.size() - 1; lno++) {
- final int ptr = lines.get(lno);
- final int end = lines.get(lno + 1);
- r[lno] = cmp.hashRegion(content, ptr, end);
- }
- return r;
- }
-
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
index 767bf61f7a..a1e2e6cece 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
@@ -59,9 +59,6 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
ai++;
bi++;
- if (a.hashes[ai] != b.hashes[bi])
- return false;
-
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
final int ae = a.lines.get(ai + 1);
@@ -78,7 +75,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
}
@Override
- public int hashRegion(final byte[] raw, int ptr, final int end) {
+ protected int hashRegion(final byte[] raw, int ptr, final int end) {
int hash = 5381;
for (; ptr < end; ptr++)
hash = (hash << 5) ^ (raw[ptr] & 0xff);
@@ -93,9 +90,6 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
ai++;
bi++;
- if (a.hashes[ai] != b.hashes[bi])
- return false;
-
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
int ae = a.lines.get(ai + 1);
@@ -129,7 +123,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
}
@Override
- public int hashRegion(byte[] raw, int ptr, int end) {
+ protected int hashRegion(byte[] raw, int ptr, int end) {
int hash = 5381;
for (; ptr < end; ptr++) {
byte c = raw[ptr];
@@ -147,9 +141,6 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
ai++;
bi++;
- if (a.hashes[ai] != b.hashes[bi])
- return false;
-
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
int ae = a.lines.get(ai + 1);
@@ -169,7 +160,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
}
@Override
- public int hashRegion(final byte[] raw, int ptr, int end) {
+ protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
ptr = trimLeadingWhitespace(raw, ptr, end);
for (; ptr < end; ptr++) {
@@ -186,9 +177,6 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
ai++;
bi++;
- if (a.hashes[ai] != b.hashes[bi])
- return false;
-
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
int ae = a.lines.get(ai + 1);
@@ -208,7 +196,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
}
@Override
- public int hashRegion(final byte[] raw, int ptr, int end) {
+ protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
end = trimTrailingWhitespace(raw, ptr, end);
for (; ptr < end; ptr++) {
@@ -225,9 +213,6 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
ai++;
bi++;
- if (a.hashes[ai] != b.hashes[bi])
- return false;
-
int as = a.lines.get(ai);
int bs = b.lines.get(bi);
int ae = a.lines.get(ai + 1);
@@ -257,7 +242,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
}
@Override
- public int hashRegion(final byte[] raw, int ptr, int end) {
+ protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
end = trimTrailingWhitespace(raw, ptr, end);
while (ptr < end) {
@@ -273,8 +258,10 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
};
@Override
- public int hash(RawText seq, int ptr) {
- return seq.hashes[ptr + 1];
+ public int hash(RawText seq, int lno) {
+ final int begin = seq.lines.get(lno + 1);
+ final int end = seq.lines.get(lno + 2);
+ return hashRegion(seq.content, begin, end);
}
@Override
@@ -347,5 +334,5 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
* 1 past the last byte of the region.
* @return hash code for the region <code>[ptr, end)</code> of raw.
*/
- public abstract int hashRegion(byte[] raw, int ptr, int end);
+ protected abstract int hashRegion(byte[] raw, int ptr, int end);
}