diff options
author | Shawn O. Pearce <spearce@spearce.org> | 2010-09-10 22:14:57 -0700 |
---|---|---|
committer | Shawn O. Pearce <spearce@spearce.org> | 2010-09-20 18:05:41 -0700 |
commit | e0970cd1b4d10888fc73ea903b2f6a62be6b0225 (patch) | |
tree | dab7c4d981bb007fda1e6da0f9b5eab230babc4a /org.eclipse.jgit | |
parent | 590a9f94a1256c1e7dba2b848771a14c23064f38 (diff) | |
download | jgit-e0970cd1b4d10888fc73ea903b2f6a62be6b0225.tar.gz jgit-e0970cd1b4d10888fc73ea903b2f6a62be6b0225.zip |
Micro-optimize reduceCommonStartEnd for RawText
This is a faster exact match based form that tries to improve
performance for the common case of the header and trailer of
a text file not changing at all. After this fast path we use
the slower path based on the super class' using equals() to
allow for whitespace ignore modes to still work.
Some simple performance testing showed a major improvement over the
older implementation for a common edit we see in JGit. The test
compared blob 29a89bc and 372a978, which is the ObjectDirectory.java
file difference in commit 41dd9ed1c054f9f9e1ab52fc7bbf1a55a56cf543.
The two text files are approximately 22 KiB in size.
DEFAULT old 203900 ns
DEFAULT new 100400 ns
This new version is 2x faster for the DEFAULT comparator, which does
not treat space specially. This is because we can now examine a
larger swath of text with fewer instructions per byte compared. The
older algorithm had to stop at each line break and recompute how to
examine the next line, while the new algorithm only stops when the
first difference is found.
WS_IGNORE_ALL old 298500 ns
WS_IGNORE_ALL new 63300 ns
Its 4.7x faster for the whitespace ignore comparator, as the common
header and footer do not have a whitespace difference. Avoiding the
special case handling for whitespace on each byte considered saves a
lot of time.
Since most edits to source code (and other text like files) appears in
the interior of the file, fast elimination of common header/footer
means faster diff throughput. In the less common case of an actual
header or footer edit, the common header/footer elimination is stopped
rather quickly either way, so there is very little downside to the
optimiation applied here.
Change-Id: I1d501b4c3ff80ed086b20bf12faf51ae62167db7
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java index 1f88aff7e3..767bf61f7a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java @@ -48,6 +48,8 @@ import static org.eclipse.jgit.util.RawCharUtil.isWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimLeadingWhitespace; import static org.eclipse.jgit.util.RawCharUtil.trimTrailingWhitespace; +import org.eclipse.jgit.util.IntList; + /** Equivalence function for {@link RawText}. */ public abstract class RawTextComparator extends SequenceComparator<RawText> { /** No special treatment. */ @@ -275,6 +277,65 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> { return seq.hashes[ptr + 1]; } + @Override + public Edit reduceCommonStartEnd(RawText a, RawText b, Edit e) { + // This is a faster exact match based form that tries to improve + // performance for the common case of the header and trailer of + // a text file not changing at all. After this fast path we use + // the slower path based on the super class' using equals() to + // allow for whitespace ignore modes to still work. + + if (e.beginA == e.endA || e.beginB == e.endB) + return e; + + byte[] aRaw = a.content; + byte[] bRaw = b.content; + + int aPtr = a.lines.get(e.beginA + 1); + int bPtr = a.lines.get(e.beginB + 1); + + int aEnd = a.lines.get(e.endA + 1); + int bEnd = b.lines.get(e.endB + 1); + + // This can never happen, but the JIT doesn't know that. If we + // define this assertion before the tight while loops below it + // should be able to skip the array bound checks on access. + // + if (aPtr < 0 || bPtr < 0 || aEnd > aRaw.length || bEnd > bRaw.length) + throw new ArrayIndexOutOfBoundsException(); + + while (aPtr < aEnd && bPtr < bEnd && aRaw[aPtr] == bRaw[bPtr]) { + aPtr++; + bPtr++; + } + + while (aPtr < aEnd && bPtr < bEnd && aRaw[aEnd - 1] == bRaw[bEnd - 1]) { + aEnd--; + bEnd--; + } + + e.beginA = findForwardLine(a.lines, e.beginA, aPtr); + e.beginB = findForwardLine(b.lines, e.beginB, bPtr); + + e.endA = findReverseLine(a.lines, e.endA, aEnd); + e.endB = findReverseLine(b.lines, e.endB, bEnd); + + return super.reduceCommonStartEnd(a, b, e); + } + + private static int findForwardLine(IntList lines, int idx, int ptr) { + final int end = lines.size() - 2; + while (idx < end && lines.get(idx + 2) <= ptr) + idx++; + return idx; + } + + private static int findReverseLine(IntList lines, int idx, int ptr) { + while (0 < idx && ptr <= lines.get(idx)) + idx--; + return idx; + } + /** * Compute a hash code for a region. * |