diff options
Diffstat (limited to 'vendor/github.com/sergi/go-diff/diffmatchpatch/dmp.go')
-rw-r--r-- | vendor/github.com/sergi/go-diff/diffmatchpatch/dmp.go | 2244 |
1 files changed, 2244 insertions, 0 deletions
diff --git a/vendor/github.com/sergi/go-diff/diffmatchpatch/dmp.go b/vendor/github.com/sergi/go-diff/diffmatchpatch/dmp.go new file mode 100644 index 0000000000..4e61821dd6 --- /dev/null +++ b/vendor/github.com/sergi/go-diff/diffmatchpatch/dmp.go @@ -0,0 +1,2244 @@ +/** + * dmp.go + * + * Go language implementation of Google Diff, Match, and Patch library + * + * Original library is Copyright (c) 2006 Google Inc. + * http://code.google.com/p/google-diff-match-patch/ + * + * Copyright (c) 2012 Sergi Mansilla <sergi.mansilla@gmail.com> + * https://github.com/sergi/go-diff + * + * See included LICENSE file for license details. + */ + +// Package diffmatchpatch offers robust algorithms to perform the +// operations required for synchronizing plain text. +package diffmatchpatch + +import ( + "bytes" + "errors" + "fmt" + "html" + "math" + "net/url" + "regexp" + "strconv" + "strings" + "time" + "unicode/utf8" +) + +// The data structure representing a diff is an array of tuples: +// [[DiffDelete, 'Hello'], [DiffInsert, 'Goodbye'], [DiffEqual, ' world.']] +// which means: delete 'Hello', add 'Goodbye' and keep ' world.' + +// Operation defines the operation of a diff item. +type Operation int8 + +const ( + // DiffDelete item represents a delete diff. + DiffDelete Operation = -1 + // DiffInsert item represents an insert diff. + DiffInsert Operation = 1 + // DiffEqual item represents an equal diff. + DiffEqual Operation = 0 +) + +// unescaper unescapes selected chars for compatibility with JavaScript's encodeURI. +// In speed critical applications this could be dropped since the +// receiving application will certainly decode these fine. +// Note that this function is case-sensitive. Thus "%3F" would not be +// unescaped. But this is ok because it is only called with the output of +// HttpUtility.UrlEncode which returns lowercase hex. +// +// Example: "%3f" -> "?", "%24" -> "$", etc. +var unescaper = strings.NewReplacer( + "%21", "!", "%7E", "~", "%27", "'", + "%28", "(", "%29", ")", "%3B", ";", + "%2F", "/", "%3F", "?", "%3A", ":", + "%40", "@", "%26", "&", "%3D", "=", + "%2B", "+", "%24", "$", "%2C", ",", "%23", "#", "%2A", "*") + +// Define some regex patterns for matching boundaries. +var ( + nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`) + whitespaceRegex = regexp.MustCompile(`\s`) + linebreakRegex = regexp.MustCompile(`[\r\n]`) + blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`) + blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`) +) + +func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff { + return append(slice[:index], append(elements, slice[index+amount:]...)...) +} + +// indexOf returns the first index of pattern in str, starting at str[i]. +func indexOf(str string, pattern string, i int) int { + if i > len(str)-1 { + return -1 + } + if i <= 0 { + return strings.Index(str, pattern) + } + ind := strings.Index(str[i:], pattern) + if ind == -1 { + return -1 + } + return ind + i +} + +// lastIndexOf returns the last index of pattern in str, starting at str[i]. +func lastIndexOf(str string, pattern string, i int) int { + if i < 0 { + return -1 + } + if i >= len(str) { + return strings.LastIndex(str, pattern) + } + _, size := utf8.DecodeRuneInString(str[i:]) + return strings.LastIndex(str[:i+size], pattern) +} + +// Return the index of pattern in target, starting at target[i]. +func runesIndexOf(target, pattern []rune, i int) int { + if i > len(target)-1 { + return -1 + } + if i <= 0 { + return runesIndex(target, pattern) + } + ind := runesIndex(target[i:], pattern) + if ind == -1 { + return -1 + } + return ind + i +} + +func min(x, y int) int { + if x < y { + return x + } + return y +} + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +func runesEqual(r1, r2 []rune) bool { + if len(r1) != len(r2) { + return false + } + for i, c := range r1 { + if c != r2[i] { + return false + } + } + return true +} + +// The equivalent of strings.Index for rune slices. +func runesIndex(r1, r2 []rune) int { + last := len(r1) - len(r2) + for i := 0; i <= last; i++ { + if runesEqual(r1[i:i+len(r2)], r2) { + return i + } + } + return -1 +} + +// Diff represents one diff operation +type Diff struct { + Type Operation + Text string +} + +// Patch represents one patch operation. +type Patch struct { + diffs []Diff + start1 int + start2 int + length1 int + length2 int +} + +// String emulates GNU diff's format. +// Header: @@ -382,8 +481,9 @@ +// Indicies are printed as 1-based, not 0-based. +func (p *Patch) String() string { + var coords1, coords2 string + + if p.length1 == 0 { + coords1 = strconv.Itoa(p.start1) + ",0" + } else if p.length1 == 1 { + coords1 = strconv.Itoa(p.start1 + 1) + } else { + coords1 = strconv.Itoa(p.start1+1) + "," + strconv.Itoa(p.length1) + } + + if p.length2 == 0 { + coords2 = strconv.Itoa(p.start2) + ",0" + } else if p.length2 == 1 { + coords2 = strconv.Itoa(p.start2 + 1) + } else { + coords2 = strconv.Itoa(p.start2+1) + "," + strconv.Itoa(p.length2) + } + + var text bytes.Buffer + _, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n") + + // Escape the body of the patch with %xx notation. + for _, aDiff := range p.diffs { + switch aDiff.Type { + case DiffInsert: + _, _ = text.WriteString("+") + case DiffDelete: + _, _ = text.WriteString("-") + case DiffEqual: + _, _ = text.WriteString(" ") + } + + _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) + _, _ = text.WriteString("\n") + } + + return unescaper.Replace(text.String()) +} + +// DiffMatchPatch holds the configuration for diff-match-patch operations. +type DiffMatchPatch struct { + // Number of seconds to map a diff before giving up (0 for infinity). + DiffTimeout time.Duration + // Cost of an empty edit operation in terms of edit characters. + DiffEditCost int + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + MatchDistance int + // When deleting a large block of text (over ~64 characters), how close do + // the contents have to be to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that MatchThreshold controls how closely the + // end points of a delete need to match. + PatchDeleteThreshold float64 + // Chunk size for context length. + PatchMargin int + // The number of bits in an int. + MatchMaxBits int + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + MatchThreshold float64 +} + +// New creates a new DiffMatchPatch object with default parameters. +func New() *DiffMatchPatch { + // Defaults. + return &DiffMatchPatch{ + DiffTimeout: time.Second, + DiffEditCost: 4, + MatchThreshold: 0.5, + MatchDistance: 1000, + PatchDeleteThreshold: 0.5, + PatchMargin: 4, + MatchMaxBits: 32, + } +} + +// DiffMain finds the differences between two texts. +func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff { + return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines) +} + +// DiffMainRunes finds the differences between two rune sequences. +func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff { + var deadline time.Time + if dmp.DiffTimeout > 0 { + deadline = time.Now().Add(dmp.DiffTimeout) + } + return dmp.diffMainRunes(text1, text2, checklines, deadline) +} + +func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { + if runesEqual(text1, text2) { + var diffs []Diff + if len(text1) > 0 { + diffs = append(diffs, Diff{DiffEqual, string(text1)}) + } + return diffs + } + // Trim off common prefix (speedup). + commonlength := commonPrefixLength(text1, text2) + commonprefix := text1[:commonlength] + text1 = text1[commonlength:] + text2 = text2[commonlength:] + + // Trim off common suffix (speedup). + commonlength = commonSuffixLength(text1, text2) + commonsuffix := text1[len(text1)-commonlength:] + text1 = text1[:len(text1)-commonlength] + text2 = text2[:len(text2)-commonlength] + + // Compute the diff on the middle block. + diffs := dmp.diffCompute(text1, text2, checklines, deadline) + + // Restore the prefix and suffix. + if len(commonprefix) != 0 { + diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...) + } + if len(commonsuffix) != 0 { + diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)}) + } + + return dmp.DiffCleanupMerge(diffs) +} + +// diffCompute finds the differences between two rune slices. Assumes that the texts do not +// have any common prefix or suffix. +func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff { + diffs := []Diff{} + if len(text1) == 0 { + // Just add some text (speedup). + return append(diffs, Diff{DiffInsert, string(text2)}) + } else if len(text2) == 0 { + // Just delete some text (speedup). + return append(diffs, Diff{DiffDelete, string(text1)}) + } + + var longtext, shorttext []rune + if len(text1) > len(text2) { + longtext = text1 + shorttext = text2 + } else { + longtext = text2 + shorttext = text1 + } + + if i := runesIndex(longtext, shorttext); i != -1 { + op := DiffInsert + // Swap insertions for deletions if diff is reversed. + if len(text1) > len(text2) { + op = DiffDelete + } + // Shorter text is inside the longer text (speedup). + return []Diff{ + Diff{op, string(longtext[:i])}, + Diff{DiffEqual, string(shorttext)}, + Diff{op, string(longtext[i+len(shorttext):])}, + } + } else if len(shorttext) == 1 { + // Single character string. + // After the previous speedup, the character can't be an equality. + return []Diff{ + Diff{DiffDelete, string(text1)}, + Diff{DiffInsert, string(text2)}, + } + // Check to see if the problem can be split in two. + } else if hm := dmp.diffHalfMatch(text1, text2); hm != nil { + // A half-match was found, sort out the return data. + text1A := hm[0] + text1B := hm[1] + text2A := hm[2] + text2B := hm[3] + midCommon := hm[4] + // Send both pairs off for separate processing. + diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline) + diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline) + // Merge the results. + return append(diffsA, append([]Diff{Diff{DiffEqual, string(midCommon)}}, diffsB...)...) + } else if checklines && len(text1) > 100 && len(text2) > 100 { + return dmp.diffLineMode(text1, text2, deadline) + } + return dmp.diffBisect(text1, text2, deadline) +} + +// diffLineMode does a quick line-level diff on both []runes, then rediff the parts for +// greater accuracy. This speedup can produce non-minimal diffs. +func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff { + // Scan the text on a line-by-line basis first. + text1, text2, linearray := dmp.diffLinesToRunes(text1, text2) + + diffs := dmp.diffMainRunes(text1, text2, false, deadline) + + // Convert the diff back to original text. + diffs = dmp.DiffCharsToLines(diffs, linearray) + // Eliminate freak matches (e.g. blank lines) + diffs = dmp.DiffCleanupSemantic(diffs) + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs = append(diffs, Diff{DiffEqual, ""}) + + pointer := 0 + countDelete := 0 + countInsert := 0 + + // NOTE: Rune slices are slower than using strings in this case. + textDelete := "" + textInsert := "" + + for pointer < len(diffs) { + switch diffs[pointer].Type { + case DiffInsert: + countInsert++ + textInsert += diffs[pointer].Text + case DiffDelete: + countDelete++ + textDelete += diffs[pointer].Text + case DiffEqual: + // Upon reaching an equality, check for prior redundancies. + if countDelete >= 1 && countInsert >= 1 { + // Delete the offending records and add the merged ones. + diffs = splice(diffs, pointer-countDelete-countInsert, + countDelete+countInsert) + + pointer = pointer - countDelete - countInsert + a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline) + for j := len(a) - 1; j >= 0; j-- { + diffs = splice(diffs, pointer, 0, a[j]) + } + pointer = pointer + len(a) + } + + countInsert = 0 + countDelete = 0 + textDelete = "" + textInsert = "" + } + pointer++ + } + + return diffs[:len(diffs)-1] // Remove the dummy entry at the end. +} + +// DiffBisect finds the 'middle snake' of a diff, split the problem in two +// and return the recursively constructed diff. +// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. +func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff { + // Unused in this code, but retained for interface compatibility. + return dmp.diffBisect([]rune(text1), []rune(text2), deadline) +} + +// diffBisect finds the 'middle snake' of a diff, splits the problem in two +// and returns the recursively constructed diff. +// See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations. +func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff { + // Cache the text lengths to prevent multiple calls. + runes1Len, runes2Len := len(runes1), len(runes2) + + maxD := (runes1Len + runes2Len + 1) / 2 + vOffset := maxD + vLength := 2 * maxD + + v1 := make([]int, vLength) + v2 := make([]int, vLength) + for i := range v1 { + v1[i] = -1 + v2[i] = -1 + } + v1[vOffset+1] = 0 + v2[vOffset+1] = 0 + + delta := runes1Len - runes2Len + // If the total number of characters is odd, then the front path will collide + // with the reverse path. + front := (delta%2 != 0) + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + k1start := 0 + k1end := 0 + k2start := 0 + k2end := 0 + for d := 0; d < maxD; d++ { + // Bail out if deadline is reached. + if !deadline.IsZero() && time.Now().After(deadline) { + break + } + + // Walk the front path one step. + for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 { + k1Offset := vOffset + k1 + var x1 int + + if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) { + x1 = v1[k1Offset+1] + } else { + x1 = v1[k1Offset-1] + 1 + } + + y1 := x1 - k1 + for x1 < runes1Len && y1 < runes2Len { + if runes1[x1] != runes2[y1] { + break + } + x1++ + y1++ + } + v1[k1Offset] = x1 + if x1 > runes1Len { + // Ran off the right of the graph. + k1end += 2 + } else if y1 > runes2Len { + // Ran off the bottom of the graph. + k1start += 2 + } else if front { + k2Offset := vOffset + delta - k1 + if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 { + // Mirror x2 onto top-left coordinate system. + x2 := runes1Len - v2[k2Offset] + if x1 >= x2 { + // Overlap detected. + return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) + } + } + } + } + // Walk the reverse path one step. + for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 { + k2Offset := vOffset + k2 + var x2 int + if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) { + x2 = v2[k2Offset+1] + } else { + x2 = v2[k2Offset-1] + 1 + } + var y2 = x2 - k2 + for x2 < runes1Len && y2 < runes2Len { + if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] { + break + } + x2++ + y2++ + } + v2[k2Offset] = x2 + if x2 > runes1Len { + // Ran off the left of the graph. + k2end += 2 + } else if y2 > runes2Len { + // Ran off the top of the graph. + k2start += 2 + } else if !front { + k1Offset := vOffset + delta - k2 + if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 { + x1 := v1[k1Offset] + y1 := vOffset + x1 - k1Offset + // Mirror x2 onto top-left coordinate system. + x2 = runes1Len - x2 + if x1 >= x2 { + // Overlap detected. + return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline) + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + return []Diff{ + Diff{DiffDelete, string(runes1)}, + Diff{DiffInsert, string(runes2)}, + } +} + +func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, + deadline time.Time) []Diff { + runes1a := runes1[:x] + runes2a := runes2[:y] + runes1b := runes1[x:] + runes2b := runes2[y:] + + // Compute both diffs serially. + diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline) + diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline) + + return append(diffs, diffsb...) +} + +// DiffLinesToChars splits two texts into a list of strings. Reduces the texts to a string of +// hashes where each Unicode character represents one line. +// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes. +func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) { + chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2) + return string(chars1), string(chars2), lineArray +} + +// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line. +func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) { + // '\x00' is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n' + lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4 + + chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash) + chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash) + + return chars1, chars2, lineArray +} + +func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) { + return dmp.DiffLinesToRunes(string(text1), string(text2)) +} + +// diffLinesToRunesMunge splits a text into an array of strings. Reduces the +// texts to a []rune where each Unicode character represents one line. +// We use strings instead of []runes as input mainly because you can't use []rune as a map key. +func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune { + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + lineStart := 0 + lineEnd := -1 + runes := []rune{} + + for lineEnd < len(text)-1 { + lineEnd = indexOf(text, "\n", lineStart) + + if lineEnd == -1 { + lineEnd = len(text) - 1 + } + + line := text[lineStart : lineEnd+1] + lineStart = lineEnd + 1 + lineValue, ok := lineHash[line] + + if ok { + runes = append(runes, rune(lineValue)) + } else { + *lineArray = append(*lineArray, line) + lineHash[line] = len(*lineArray) - 1 + runes = append(runes, rune(len(*lineArray)-1)) + } + } + + return runes +} + +// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of +// text. +func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff { + hydrated := make([]Diff, 0, len(diffs)) + for _, aDiff := range diffs { + chars := aDiff.Text + text := make([]string, len(chars)) + + for i, r := range chars { + text[i] = lineArray[r] + } + + aDiff.Text = strings.Join(text, "") + hydrated = append(hydrated, aDiff) + } + return hydrated +} + +// DiffCommonPrefix determines the common prefix length of two strings. +func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int { + // Unused in this code, but retained for interface compatibility. + return commonPrefixLength([]rune(text1), []rune(text2)) +} + +// DiffCommonSuffix determines the common suffix length of two strings. +func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int { + // Unused in this code, but retained for interface compatibility. + return commonSuffixLength([]rune(text1), []rune(text2)) +} + +// commonPrefixLength returns the length of the common prefix of two rune slices. +func commonPrefixLength(text1, text2 []rune) int { + short, long := text1, text2 + if len(short) > len(long) { + short, long = long, short + } + for i, r := range short { + if r != long[i] { + return i + } + } + return len(short) +} + +// commonSuffixLength returns the length of the common suffix of two rune slices. +func commonSuffixLength(text1, text2 []rune) int { + n := min(len(text1), len(text2)) + for i := 0; i < n; i++ { + if text1[len(text1)-i-1] != text2[len(text2)-i-1] { + return i + } + } + return n + + // Binary search. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + /* + pointermin := 0 + pointermax := math.Min(len(text1), len(text2)) + pointermid := pointermax + pointerend := 0 + for pointermin < pointermid { + if text1[len(text1)-pointermid:len(text1)-pointerend] == + text2[len(text2)-pointermid:len(text2)-pointerend] { + pointermin = pointermid + pointerend = pointermin + } else { + pointermax = pointermid + } + pointermid = math.Floor((pointermax-pointermin)/2 + pointermin) + } + return pointermid + */ +} + +// DiffCommonOverlap determines if the suffix of one string is the prefix of another. +func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int { + // Cache the text lengths to prevent multiple calls. + text1Length := len(text1) + text2Length := len(text2) + // Eliminate the null case. + if text1Length == 0 || text2Length == 0 { + return 0 + } + // Truncate the longer string. + if text1Length > text2Length { + text1 = text1[text1Length-text2Length:] + } else if text1Length < text2Length { + text2 = text2[0:text1Length] + } + textLength := int(math.Min(float64(text1Length), float64(text2Length))) + // Quick check for the worst case. + if text1 == text2 { + return textLength + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + best := 0 + length := 1 + for { + pattern := text1[textLength-length:] + found := strings.Index(text2, pattern) + if found == -1 { + break + } + length += found + if found == 0 || text1[textLength-length:] == text2[0:length] { + best = length + length++ + } + } + + return best +} + +// DiffHalfMatch checks whether the two texts share a substring which is at +// least half the length of the longer text. This speedup can produce non-minimal diffs. +func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string { + // Unused in this code, but retained for interface compatibility. + runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2)) + if runeSlices == nil { + return nil + } + + result := make([]string, len(runeSlices)) + for i, r := range runeSlices { + result[i] = string(r) + } + return result +} + +func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune { + if dmp.DiffTimeout <= 0 { + // Don't risk returning a non-optimal diff if we have unlimited time. + return nil + } + + var longtext, shorttext []rune + if len(text1) > len(text2) { + longtext = text1 + shorttext = text2 + } else { + longtext = text2 + shorttext = text1 + } + + if len(longtext) < 4 || len(shorttext)*2 < len(longtext) { + return nil // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4)) + + // Check again based on the third quarter. + hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2)) + + hm := [][]rune{} + if hm1 == nil && hm2 == nil { + return nil + } else if hm2 == nil { + hm = hm1 + } else if hm1 == nil { + hm = hm2 + } else { + // Both matched. Select the longest. + if len(hm1[4]) > len(hm2[4]) { + hm = hm1 + } else { + hm = hm2 + } + } + + // A half-match was found, sort out the return data. + if len(text1) > len(text2) { + return hm + } + + return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]} +} + +// diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext? +// @param {string} longtext Longer string. +// @param {string} shorttext Shorter string. +// @param {number} i Start index of quarter length substring within longtext. +// @return {Array.<string>} Five element Array, containing the prefix of +// longtext, the suffix of longtext, the prefix of shorttext, the suffix +// of shorttext and the common middle. Or null if there was no match. +func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune { + var bestCommonA []rune + var bestCommonB []rune + var bestCommonLen int + var bestLongtextA []rune + var bestLongtextB []rune + var bestShorttextA []rune + var bestShorttextB []rune + + // Start with a 1/4 length substring at position i as a seed. + seed := l[i : i+len(l)/4] + + for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) { + prefixLength := commonPrefixLength(l[i:], s[j:]) + suffixLength := commonSuffixLength(l[:i], s[:j]) + + if bestCommonLen < suffixLength+prefixLength { + bestCommonA = s[j-suffixLength : j] + bestCommonB = s[j : j+prefixLength] + bestCommonLen = len(bestCommonA) + len(bestCommonB) + bestLongtextA = l[:i-suffixLength] + bestLongtextB = l[i+prefixLength:] + bestShorttextA = s[:j-suffixLength] + bestShorttextB = s[j+prefixLength:] + } + } + + if bestCommonLen*2 < len(l) { + return nil + } + + return [][]rune{ + bestLongtextA, + bestLongtextB, + bestShorttextA, + bestShorttextB, + append(bestCommonA, bestCommonB...), + } +} + +// DiffCleanupSemantic reduces the number of edits by eliminating +// semantically trivial equalities. +func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff { + changes := false + // Stack of indices where equalities are found. + type equality struct { + data int + next *equality + } + var equalities *equality + + var lastequality string + // Always equal to diffs[equalities[equalitiesLength - 1]][1] + var pointer int // Index of current position. + // Number of characters that changed prior to the equality. + var lengthInsertions1, lengthDeletions1 int + // Number of characters that changed after the equality. + var lengthInsertions2, lengthDeletions2 int + + for pointer < len(diffs) { + if diffs[pointer].Type == DiffEqual { // Equality found. + equalities = &equality{ + data: pointer, + next: equalities, + } + lengthInsertions1 = lengthInsertions2 + lengthDeletions1 = lengthDeletions2 + lengthInsertions2 = 0 + lengthDeletions2 = 0 + lastequality = diffs[pointer].Text + } else { // An insertion or deletion. + if diffs[pointer].Type == DiffInsert { + lengthInsertions2 += len(diffs[pointer].Text) + } else { + lengthDeletions2 += len(diffs[pointer].Text) + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1))) + difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2))) + if len(lastequality) > 0 && + (len(lastequality) <= difference1) && + (len(lastequality) <= difference2) { + // Duplicate record. + insPoint := equalities.data + diffs = append( + diffs[:insPoint], + append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) + + // Change second copy to insert. + diffs[insPoint+1].Type = DiffInsert + // Throw away the equality we just deleted. + equalities = equalities.next + + if equalities != nil { + equalities = equalities.next + } + if equalities != nil { + pointer = equalities.data + } else { + pointer = -1 + } + + lengthInsertions1 = 0 // Reset the counters. + lengthDeletions1 = 0 + lengthInsertions2 = 0 + lengthDeletions2 = 0 + lastequality = "" + changes = true + } + } + pointer++ + } + + // Normalize the diff. + if changes { + diffs = dmp.DiffCleanupMerge(diffs) + } + diffs = dmp.DiffCleanupSemanticLossless(diffs) + // Find any overlaps between deletions and insertions. + // e.g: <del>abcxxx</del><ins>xxxdef</ins> + // -> <del>abc</del>xxx<ins>def</ins> + // e.g: <del>xxxabc</del><ins>defxxx</ins> + // -> <ins>def</ins>xxx<del>abc</del> + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1 + for pointer < len(diffs) { + if diffs[pointer-1].Type == DiffDelete && + diffs[pointer].Type == DiffInsert { + deletion := diffs[pointer-1].Text + insertion := diffs[pointer].Text + overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion) + overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion) + if overlapLength1 >= overlapLength2 { + if float64(overlapLength1) >= float64(len(deletion))/2 || + float64(overlapLength1) >= float64(len(insertion))/2 { + + // Overlap found. Insert an equality and trim the surrounding edits. + diffs = append( + diffs[:pointer], + append([]Diff{Diff{DiffEqual, insertion[:overlapLength1]}}, diffs[pointer:]...)...) + //diffs.splice(pointer, 0, + // [DiffEqual, insertion[0 : overlapLength1)]] + diffs[pointer-1].Text = + deletion[0 : len(deletion)-overlapLength1] + diffs[pointer+1].Text = insertion[overlapLength1:] + pointer++ + } + } else { + if float64(overlapLength2) >= float64(len(deletion))/2 || + float64(overlapLength2) >= float64(len(insertion))/2 { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + overlap := Diff{DiffEqual, deletion[:overlapLength2]} + diffs = append( + diffs[:pointer], + append([]Diff{overlap}, diffs[pointer:]...)...) + // diffs.splice(pointer, 0, + // [DiffEqual, deletion[0 : overlapLength2)]] + diffs[pointer-1].Type = DiffInsert + diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2] + diffs[pointer+1].Type = DiffDelete + diffs[pointer+1].Text = deletion[overlapLength2:] + pointer++ + } + } + pointer++ + } + pointer++ + } + + return diffs +} + +// DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities +// which can be shifted sideways to align the edit to a word boundary. +// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. +func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff { + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * Closure, but does not reference any external variables. + * @param {string} one First string. + * @param {string} two Second string. + * @return {number} The score. + * @private + */ + diffCleanupSemanticScore := func(one, two string) int { + if len(one) == 0 || len(two) == 0 { + // Edges are the best. + return 6 + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + rune1, _ := utf8.DecodeLastRuneInString(one) + rune2, _ := utf8.DecodeRuneInString(two) + char1 := string(rune1) + char2 := string(rune2) + + nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1) + nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2) + whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1) + whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2) + lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1) + lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2) + blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one) + blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two) + + if blankLine1 || blankLine2 { + // Five points for blank lines. + return 5 + } else if lineBreak1 || lineBreak2 { + // Four points for line breaks. + return 4 + } else if nonAlphaNumeric1 && !whitespace1 && whitespace2 { + // Three points for end of sentences. + return 3 + } else if whitespace1 || whitespace2 { + // Two points for whitespace. + return 2 + } else if nonAlphaNumeric1 || nonAlphaNumeric2 { + // One point for non-alphanumeric. + return 1 + } + return 0 + } + + pointer := 1 + + // Intentionally ignore the first and last element (don't need checking). + for pointer < len(diffs)-1 { + if diffs[pointer-1].Type == DiffEqual && + diffs[pointer+1].Type == DiffEqual { + + // This is a single edit surrounded by equalities. + equality1 := diffs[pointer-1].Text + edit := diffs[pointer].Text + equality2 := diffs[pointer+1].Text + + // First, shift the edit as far left as possible. + commonOffset := dmp.DiffCommonSuffix(equality1, edit) + if commonOffset > 0 { + commonString := edit[len(edit)-commonOffset:] + equality1 = equality1[0 : len(equality1)-commonOffset] + edit = commonString + edit[:len(edit)-commonOffset] + equality2 = commonString + equality2 + } + + // Second, step character by character right, looking for the best fit. + bestEquality1 := equality1 + bestEdit := edit + bestEquality2 := equality2 + bestScore := diffCleanupSemanticScore(equality1, edit) + + diffCleanupSemanticScore(edit, equality2) + + for len(edit) != 0 && len(equality2) != 0 { + _, sz := utf8.DecodeRuneInString(edit) + if len(equality2) < sz || edit[:sz] != equality2[:sz] { + break + } + equality1 += edit[:sz] + edit = edit[sz:] + equality2[:sz] + equality2 = equality2[sz:] + score := diffCleanupSemanticScore(equality1, edit) + + diffCleanupSemanticScore(edit, equality2) + // The >= encourages trailing rather than leading whitespace on + // edits. + if score >= bestScore { + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + } + } + + if diffs[pointer-1].Text != bestEquality1 { + // We have an improvement, save it back to the diff. + if len(bestEquality1) != 0 { + diffs[pointer-1].Text = bestEquality1 + } else { + diffs = splice(diffs, pointer-1, 1) + pointer-- + } + + diffs[pointer].Text = bestEdit + if len(bestEquality2) != 0 { + diffs[pointer+1].Text = bestEquality2 + } else { + //splice(diffs, pointer+1, 1) + diffs = append(diffs[:pointer+1], diffs[pointer+2:]...) + pointer-- + } + } + } + pointer++ + } + + return diffs +} + +// DiffCleanupEfficiency reduces the number of edits by eliminating +// operationally trivial equalities. +func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff { + changes := false + // Stack of indices where equalities are found. + type equality struct { + data int + next *equality + } + var equalities *equality + // Always equal to equalities[equalitiesLength-1][1] + lastequality := "" + pointer := 0 // Index of current position. + // Is there an insertion operation before the last equality. + preIns := false + // Is there a deletion operation before the last equality. + preDel := false + // Is there an insertion operation after the last equality. + postIns := false + // Is there a deletion operation after the last equality. + postDel := false + for pointer < len(diffs) { + if diffs[pointer].Type == DiffEqual { // Equality found. + if len(diffs[pointer].Text) < dmp.DiffEditCost && + (postIns || postDel) { + // Candidate found. + equalities = &equality{ + data: pointer, + next: equalities, + } + preIns = postIns + preDel = postDel + lastequality = diffs[pointer].Text + } else { + // Not a candidate, and can never become one. + equalities = nil + lastequality = "" + } + postIns = false + postDel = false + } else { // An insertion or deletion. + if diffs[pointer].Type == DiffDelete { + postDel = true + } else { + postIns = true + } + /* + * Five types to be split: + * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> + * <ins>A</ins>X<ins>C</ins><del>D</del> + * <ins>A</ins><del>B</del>X<ins>C</ins> + * <ins>A</del>X<ins>C</ins><del>D</del> + * <ins>A</ins><del>B</del>X<del>C</del> + */ + var sumPres int + if preIns { + sumPres++ + } + if preDel { + sumPres++ + } + if postIns { + sumPres++ + } + if postDel { + sumPres++ + } + if len(lastequality) > 0 && + ((preIns && preDel && postIns && postDel) || + ((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) { + + insPoint := equalities.data + + // Duplicate record. + diffs = append(diffs[:insPoint], + append([]Diff{Diff{DiffDelete, lastequality}}, diffs[insPoint:]...)...) + + // Change second copy to insert. + diffs[insPoint+1].Type = DiffInsert + // Throw away the equality we just deleted. + equalities = equalities.next + lastequality = "" + + if preIns && preDel { + // No changes made which could affect previous entry, keep going. + postIns = true + postDel = true + equalities = nil + } else { + if equalities != nil { + equalities = equalities.next + } + if equalities != nil { + pointer = equalities.data + } else { + pointer = -1 + } + postIns = false + postDel = false + } + changes = true + } + } + pointer++ + } + + if changes { + diffs = dmp.DiffCleanupMerge(diffs) + } + + return diffs +} + +// DiffCleanupMerge reorders and merges like edit sections. Merge equalities. +// Any edit section can move as long as it doesn't cross an equality. +func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff { + // Add a dummy entry at the end. + diffs = append(diffs, Diff{DiffEqual, ""}) + pointer := 0 + countDelete := 0 + countInsert := 0 + commonlength := 0 + textDelete := []rune(nil) + textInsert := []rune(nil) + + for pointer < len(diffs) { + switch diffs[pointer].Type { + case DiffInsert: + countInsert++ + textInsert = append(textInsert, []rune(diffs[pointer].Text)...) + pointer++ + break + case DiffDelete: + countDelete++ + textDelete = append(textDelete, []rune(diffs[pointer].Text)...) + pointer++ + break + case DiffEqual: + // Upon reaching an equality, check for prior redundancies. + if countDelete+countInsert > 1 { + if countDelete != 0 && countInsert != 0 { + // Factor out any common prefixies. + commonlength = commonPrefixLength(textInsert, textDelete) + if commonlength != 0 { + x := pointer - countDelete - countInsert + if x > 0 && diffs[x-1].Type == DiffEqual { + diffs[x-1].Text += string(textInsert[:commonlength]) + } else { + diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...) + pointer++ + } + textInsert = textInsert[commonlength:] + textDelete = textDelete[commonlength:] + } + // Factor out any common suffixies. + commonlength = commonSuffixLength(textInsert, textDelete) + if commonlength != 0 { + insertIndex := len(textInsert) - commonlength + deleteIndex := len(textDelete) - commonlength + diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text + textInsert = textInsert[:insertIndex] + textDelete = textDelete[:deleteIndex] + } + } + // Delete the offending records and add the merged ones. + if countDelete == 0 { + diffs = splice(diffs, pointer-countInsert, + countDelete+countInsert, + Diff{DiffInsert, string(textInsert)}) + } else if countInsert == 0 { + diffs = splice(diffs, pointer-countDelete, + countDelete+countInsert, + Diff{DiffDelete, string(textDelete)}) + } else { + diffs = splice(diffs, pointer-countDelete-countInsert, + countDelete+countInsert, + Diff{DiffDelete, string(textDelete)}, + Diff{DiffInsert, string(textInsert)}) + } + + pointer = pointer - countDelete - countInsert + 1 + if countDelete != 0 { + pointer++ + } + if countInsert != 0 { + pointer++ + } + } else if pointer != 0 && diffs[pointer-1].Type == DiffEqual { + // Merge this equality with the previous one. + diffs[pointer-1].Text += diffs[pointer].Text + diffs = append(diffs[:pointer], diffs[pointer+1:]...) + } else { + pointer++ + } + countInsert = 0 + countDelete = 0 + textDelete = nil + textInsert = nil + break + } + } + + if len(diffs[len(diffs)-1].Text) == 0 { + diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC + changes := false + pointer = 1 + // Intentionally ignore the first and last element (don't need checking). + for pointer < (len(diffs) - 1) { + if diffs[pointer-1].Type == DiffEqual && + diffs[pointer+1].Type == DiffEqual { + // This is a single edit surrounded by equalities. + if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) { + // Shift the edit over the previous equality. + diffs[pointer].Text = diffs[pointer-1].Text + + diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)] + diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text + diffs = splice(diffs, pointer-1, 1) + changes = true + } else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) { + // Shift the edit over the next equality. + diffs[pointer-1].Text += diffs[pointer+1].Text + diffs[pointer].Text = + diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text + diffs = splice(diffs, pointer+1, 1) + changes = true + } + } + pointer++ + } + + // If shifts were made, the diff needs reordering and another shift sweep. + if changes { + diffs = dmp.DiffCleanupMerge(diffs) + } + + return diffs +} + +// DiffXIndex returns the equivalent location in s2. +// loc is a location in text1, comAdde and return the equivalent location in +// text2. +// e.g. "The cat" vs "The big cat", 1->1, 5->8 +func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int { + chars1 := 0 + chars2 := 0 + lastChars1 := 0 + lastChars2 := 0 + lastDiff := Diff{} + for i := 0; i < len(diffs); i++ { + aDiff := diffs[i] + if aDiff.Type != DiffInsert { + // Equality or deletion. + chars1 += len(aDiff.Text) + } + if aDiff.Type != DiffDelete { + // Equality or insertion. + chars2 += len(aDiff.Text) + } + if chars1 > loc { + // Overshot the location. + lastDiff = aDiff + break + } + lastChars1 = chars1 + lastChars2 = chars2 + } + if lastDiff.Type == DiffDelete { + // The location was deleted. + return lastChars2 + } + // Add the remaining character length. + return lastChars2 + (loc - lastChars1) +} + +// DiffPrettyHtml converts a []Diff into a pretty HTML report. +// It is intended as an example from which to write one's own +// display functions. +func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string { + var buff bytes.Buffer + for _, diff := range diffs { + text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶<br>", -1) + switch diff.Type { + case DiffInsert: + _, _ = buff.WriteString("<ins style=\"background:#e6ffe6;\">") + _, _ = buff.WriteString(text) + _, _ = buff.WriteString("</ins>") + case DiffDelete: + _, _ = buff.WriteString("<del style=\"background:#ffe6e6;\">") + _, _ = buff.WriteString(text) + _, _ = buff.WriteString("</del>") + case DiffEqual: + _, _ = buff.WriteString("<span>") + _, _ = buff.WriteString(text) + _, _ = buff.WriteString("</span>") + } + } + return buff.String() +} + +// DiffPrettyText converts a []Diff into a colored text report. +func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string { + var buff bytes.Buffer + for _, diff := range diffs { + text := diff.Text + + switch diff.Type { + case DiffInsert: + _, _ = buff.WriteString("\x1b[32m") + _, _ = buff.WriteString(text) + _, _ = buff.WriteString("\x1b[0m") + case DiffDelete: + _, _ = buff.WriteString("\x1b[31m") + _, _ = buff.WriteString(text) + _, _ = buff.WriteString("\x1b[0m") + case DiffEqual: + _, _ = buff.WriteString(text) + } + } + + return buff.String() +} + +// DiffText1 computes and returns the source text (all equalities and deletions). +func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string { + //StringBuilder text = new StringBuilder() + var text bytes.Buffer + + for _, aDiff := range diffs { + if aDiff.Type != DiffInsert { + _, _ = text.WriteString(aDiff.Text) + } + } + return text.String() +} + +// DiffText2 computes and returns the destination text (all equalities and insertions). +func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string { + var text bytes.Buffer + + for _, aDiff := range diffs { + if aDiff.Type != DiffDelete { + _, _ = text.WriteString(aDiff.Text) + } + } + return text.String() +} + +// DiffLevenshtein computes the Levenshtein distance; the number of inserted, deleted or +// substituted characters. +func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int { + levenshtein := 0 + insertions := 0 + deletions := 0 + + for _, aDiff := range diffs { + switch aDiff.Type { + case DiffInsert: + insertions += len(aDiff.Text) + case DiffDelete: + deletions += len(aDiff.Text) + case DiffEqual: + // A deletion and an insertion is one substitution. + levenshtein += max(insertions, deletions) + insertions = 0 + deletions = 0 + } + } + + levenshtein += max(insertions, deletions) + return levenshtein +} + +// DiffToDelta crushes the diff into an encoded string which describes the operations +// required to transform text1 into text2. +// E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. +// Operations are tab-separated. Inserted text is escaped using %xx +// notation. +func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string { + var text bytes.Buffer + for _, aDiff := range diffs { + switch aDiff.Type { + case DiffInsert: + _, _ = text.WriteString("+") + _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1)) + _, _ = text.WriteString("\t") + break + case DiffDelete: + _, _ = text.WriteString("-") + _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) + _, _ = text.WriteString("\t") + break + case DiffEqual: + _, _ = text.WriteString("=") + _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text))) + _, _ = text.WriteString("\t") + break + } + } + delta := text.String() + if len(delta) != 0 { + // Strip off trailing tab character. + delta = delta[0 : utf8.RuneCountInString(delta)-1] + delta = unescaper.Replace(delta) + } + return delta +} + +// DiffFromDelta given the original text1, and an encoded string which describes the +// operations required to transform text1 into text2, comAdde the full diff. +func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err error) { + diffs = []Diff{} + + defer func() { + if r := recover(); r != nil { + err = r.(error) + } + }() + + pointer := 0 // Cursor in text1 + tokens := strings.Split(delta, "\t") + + for _, token := range tokens { + if len(token) == 0 { + // Blank tokens are ok (from a trailing \t). + continue + } + + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + param := token[1:] + + switch op := token[0]; op { + case '+': + // decode would Diff all "+" to " " + param = strings.Replace(param, "+", "%2b", -1) + param, err = url.QueryUnescape(param) + if err != nil { + return nil, err + } + if !utf8.ValidString(param) { + return nil, fmt.Errorf("invalid UTF-8 token: %q", param) + } + diffs = append(diffs, Diff{DiffInsert, param}) + case '=', '-': + n, err := strconv.ParseInt(param, 10, 0) + if err != nil { + return diffs, err + } else if n < 0 { + return diffs, errors.New("Negative number in DiffFromDelta: " + param) + } + + // remember that string slicing is by byte - we want by rune here. + text := string([]rune(text1)[pointer : pointer+int(n)]) + pointer += int(n) + + if op == '=' { + diffs = append(diffs, Diff{DiffEqual, text}) + } else { + diffs = append(diffs, Diff{DiffDelete, text}) + } + default: + // Anything else is an error. + return diffs, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0])) + } + } + + if pointer != len([]rune(text1)) { + return diffs, fmt.Errorf("Delta length (%v) smaller than source text length (%v)", pointer, len(text1)) + } + return diffs, err +} + +// MATCH FUNCTIONS + +// MatchMain locates the best instance of 'pattern' in 'text' near 'loc'. +// Returns -1 if no match found. +func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int { + // Check for null inputs not needed since null can't be passed in C#. + + loc = int(math.Max(0, math.Min(float64(loc), float64(len(text))))) + if text == pattern { + // Shortcut (potentially not guaranteed by the algorithm) + return 0 + } else if len(text) == 0 { + // Nothing to match. + return -1 + } else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc + } + // Do a fuzzy compare. + return dmp.MatchBitap(text, pattern, loc) +} + +// MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the +// Bitap algorithm. Returns -1 if no match found. +func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int { + // Initialise the alphabet. + s := dmp.MatchAlphabet(pattern) + + // Highest score beyond which we give up. + scoreThreshold := dmp.MatchThreshold + // Is there a nearby exact match? (speedup) + bestLoc := indexOf(text, pattern, loc) + if bestLoc != -1 { + scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, + pattern), scoreThreshold) + // What about in the other direction? (speedup) + bestLoc = lastIndexOf(text, pattern, loc+len(pattern)) + if bestLoc != -1 { + scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, + pattern), scoreThreshold) + } + } + + // Initialise the bit arrays. + matchmask := 1 << uint((len(pattern) - 1)) + bestLoc = -1 + + var binMin, binMid int + binMax := len(pattern) + len(text) + lastRd := []int{} + for d := 0; d < len(pattern); d++ { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + binMin = 0 + binMid = binMax + for binMin < binMid { + if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold { + binMin = binMid + } else { + binMax = binMid + } + binMid = (binMax-binMin)/2 + binMin + } + // Use the result from this iteration as the maximum for the next. + binMax = binMid + start := int(math.Max(1, float64(loc-binMid+1))) + finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern))) + + rd := make([]int, finish+2) + rd[finish+1] = (1 << uint(d)) - 1 + + for j := finish; j >= start; j-- { + var charMatch int + if len(text) <= j-1 { + // Out of range. + charMatch = 0 + } else if _, ok := s[text[j-1]]; !ok { + charMatch = 0 + } else { + charMatch = s[text[j-1]] + } + + if d == 0 { + // First pass: exact match. + rd[j] = ((rd[j+1] << 1) | 1) & charMatch + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1] + } + if (rd[j] & matchmask) != 0 { + score := dmp.matchBitapScore(d, j-1, loc, pattern) + // This match will almost certainly be better than any existing + // match. But check anyway. + if score <= scoreThreshold { + // Told you so. + scoreThreshold = score + bestLoc = j - 1 + if bestLoc > loc { + // When passing loc, don't exceed our current distance from loc. + start = int(math.Max(1, float64(2*loc-bestLoc))) + } else { + // Already passed loc, downhill from here on in. + break + } + } + } + } + if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold { + // No hope for a (better) match at greater error levels. + break + } + lastRd = rd + } + return bestLoc +} + +// matchBitapScore computes and returns the score for a match with e errors and x location. +func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 { + accuracy := float64(e) / float64(len(pattern)) + proximity := math.Abs(float64(loc - x)) + if dmp.MatchDistance == 0 { + // Dodge divide by zero error. + if proximity == 0 { + return accuracy + } + + return 1.0 + } + return accuracy + (proximity / float64(dmp.MatchDistance)) +} + +// MatchAlphabet initialises the alphabet for the Bitap algorithm. +func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int { + s := map[byte]int{} + charPattern := []byte(pattern) + for _, c := range charPattern { + _, ok := s[c] + if !ok { + s[c] = 0 + } + } + i := 0 + + for _, c := range charPattern { + value := s[c] | int(uint(1)<<uint((len(pattern)-i-1))) + s[c] = value + i++ + } + return s +} + +// PATCH FUNCTIONS + +// PatchAddContext increases the context until it is unique, +// but doesn't let the pattern expand beyond MatchMaxBits. +func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch { + if len(text) == 0 { + return patch + } + + pattern := text[patch.start2 : patch.start2+patch.length1] + padding := 0 + + // Look for the first and last matches of pattern in text. If two + // different matches are found, increase the pattern length. + for strings.Index(text, pattern) != strings.LastIndex(text, pattern) && + len(pattern) < dmp.MatchMaxBits-2*dmp.PatchMargin { + padding += dmp.PatchMargin + maxStart := max(0, patch.start2-padding) + minEnd := min(len(text), patch.start2+patch.length1+padding) + pattern = text[maxStart:minEnd] + } + // Add one chunk for good luck. + padding += dmp.PatchMargin + + // Add the prefix. + prefix := text[max(0, patch.start2-padding):patch.start2] + if len(prefix) != 0 { + patch.diffs = append([]Diff{Diff{DiffEqual, prefix}}, patch.diffs...) + } + // Add the suffix. + suffix := text[patch.start2+patch.length1 : min(len(text), patch.start2+patch.length1+padding)] + if len(suffix) != 0 { + patch.diffs = append(patch.diffs, Diff{DiffEqual, suffix}) + } + + // Roll back the start points. + patch.start1 -= len(prefix) + patch.start2 -= len(prefix) + // Extend the lengths. + patch.length1 += len(prefix) + len(suffix) + patch.length2 += len(prefix) + len(suffix) + + return patch +} + +// PatchMake computes a list of patches. +func (dmp *DiffMatchPatch) PatchMake(opt ...interface{}) []Patch { + if len(opt) == 1 { + diffs, _ := opt[0].([]Diff) + text1 := dmp.DiffText1(diffs) + return dmp.PatchMake(text1, diffs) + } else if len(opt) == 2 { + text1 := opt[0].(string) + switch t := opt[1].(type) { + case string: + diffs := dmp.DiffMain(text1, t, true) + if len(diffs) > 2 { + diffs = dmp.DiffCleanupSemantic(diffs) + diffs = dmp.DiffCleanupEfficiency(diffs) + } + return dmp.PatchMake(text1, diffs) + case []Diff: + return dmp.patchMake2(text1, t) + } + } else if len(opt) == 3 { + return dmp.PatchMake(opt[0], opt[2]) + } + return []Patch{} +} + +// patchMake2 computes a list of patches to turn text1 into text2. +// text2 is not provided, diffs are the delta between text1 and text2. +func (dmp *DiffMatchPatch) patchMake2(text1 string, diffs []Diff) []Patch { + // Check for null inputs not needed since null can't be passed in C#. + patches := []Patch{} + if len(diffs) == 0 { + return patches // Get rid of the null case. + } + + patch := Patch{} + charCount1 := 0 // Number of characters into the text1 string. + charCount2 := 0 // Number of characters into the text2 string. + // Start with text1 (prepatchText) and apply the diffs until we arrive at + // text2 (postpatchText). We recreate the patches one by one to determine + // context info. + prepatchText := text1 + postpatchText := text1 + + for i, aDiff := range diffs { + if len(patch.diffs) == 0 && aDiff.Type != DiffEqual { + // A new patch starts here. + patch.start1 = charCount1 + patch.start2 = charCount2 + } + + switch aDiff.Type { + case DiffInsert: + patch.diffs = append(patch.diffs, aDiff) + patch.length2 += len(aDiff.Text) + postpatchText = postpatchText[:charCount2] + + aDiff.Text + postpatchText[charCount2:] + case DiffDelete: + patch.length1 += len(aDiff.Text) + patch.diffs = append(patch.diffs, aDiff) + postpatchText = postpatchText[:charCount2] + postpatchText[charCount2+len(aDiff.Text):] + case DiffEqual: + if len(aDiff.Text) <= 2*dmp.PatchMargin && + len(patch.diffs) != 0 && i != len(diffs)-1 { + // Small equality inside a patch. + patch.diffs = append(patch.diffs, aDiff) + patch.length1 += len(aDiff.Text) + patch.length2 += len(aDiff.Text) + } + if len(aDiff.Text) >= 2*dmp.PatchMargin { + // Time for a new patch. + if len(patch.diffs) != 0 { + patch = dmp.PatchAddContext(patch, prepatchText) + patches = append(patches, patch) + patch = Patch{} + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatchText = postpatchText + charCount1 = charCount2 + } + } + } + + // Update the current character count. + if aDiff.Type != DiffInsert { + charCount1 += len(aDiff.Text) + } + if aDiff.Type != DiffDelete { + charCount2 += len(aDiff.Text) + } + } + + // Pick up the leftover patch if not empty. + if len(patch.diffs) != 0 { + patch = dmp.PatchAddContext(patch, prepatchText) + patches = append(patches, patch) + } + + return patches +} + +// PatchDeepCopy returns an array that is identical to a +// given an array of patches. +func (dmp *DiffMatchPatch) PatchDeepCopy(patches []Patch) []Patch { + patchesCopy := []Patch{} + for _, aPatch := range patches { + patchCopy := Patch{} + for _, aDiff := range aPatch.diffs { + patchCopy.diffs = append(patchCopy.diffs, Diff{ + aDiff.Type, + aDiff.Text, + }) + } + patchCopy.start1 = aPatch.start1 + patchCopy.start2 = aPatch.start2 + patchCopy.length1 = aPatch.length1 + patchCopy.length2 = aPatch.length2 + patchesCopy = append(patchesCopy, patchCopy) + } + return patchesCopy +} + +// PatchApply merges a set of patches onto the text. Returns a patched text, as well +// as an array of true/false values indicating which patches were applied. +func (dmp *DiffMatchPatch) PatchApply(patches []Patch, text string) (string, []bool) { + if len(patches) == 0 { + return text, []bool{} + } + + // Deep copy the patches so that no changes are made to originals. + patches = dmp.PatchDeepCopy(patches) + + nullPadding := dmp.PatchAddPadding(patches) + text = nullPadding + text + nullPadding + patches = dmp.PatchSplitMax(patches) + + x := 0 + // delta keeps track of the offset between the expected and actual + // location of the previous patch. If there are patches expected at + // positions 10 and 20, but the first patch was found at 12, delta is 2 + // and the second patch has an effective expected position of 22. + delta := 0 + results := make([]bool, len(patches)) + for _, aPatch := range patches { + expectedLoc := aPatch.start2 + delta + text1 := dmp.DiffText1(aPatch.diffs) + var startLoc int + endLoc := -1 + if len(text1) > dmp.MatchMaxBits { + // PatchSplitMax will only provide an oversized pattern + // in the case of a monster delete. + startLoc = dmp.MatchMain(text, text1[:dmp.MatchMaxBits], expectedLoc) + if startLoc != -1 { + endLoc = dmp.MatchMain(text, + text1[len(text1)-dmp.MatchMaxBits:], expectedLoc+len(text1)-dmp.MatchMaxBits) + if endLoc == -1 || startLoc >= endLoc { + // Can't find valid trailing context. Drop this patch. + startLoc = -1 + } + } + } else { + startLoc = dmp.MatchMain(text, text1, expectedLoc) + } + if startLoc == -1 { + // No match found. :( + results[x] = false + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1 + } else { + // Found a match. :) + results[x] = true + delta = startLoc - expectedLoc + var text2 string + if endLoc == -1 { + text2 = text[startLoc:int(math.Min(float64(startLoc+len(text1)), float64(len(text))))] + } else { + text2 = text[startLoc:int(math.Min(float64(endLoc+dmp.MatchMaxBits), float64(len(text))))] + } + if text1 == text2 { + // Perfect match, just shove the Replacement text in. + text = text[:startLoc] + dmp.DiffText2(aPatch.diffs) + text[startLoc+len(text1):] + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + diffs := dmp.DiffMain(text1, text2, false) + if len(text1) > dmp.MatchMaxBits && float64(dmp.DiffLevenshtein(diffs))/float64(len(text1)) > dmp.PatchDeleteThreshold { + // The end points match, but the content is unacceptably bad. + results[x] = false + } else { + diffs = dmp.DiffCleanupSemanticLossless(diffs) + index1 := 0 + for _, aDiff := range aPatch.diffs { + if aDiff.Type != DiffEqual { + index2 := dmp.DiffXIndex(diffs, index1) + if aDiff.Type == DiffInsert { + // Insertion + text = text[:startLoc+index2] + aDiff.Text + text[startLoc+index2:] + } else if aDiff.Type == DiffDelete { + // Deletion + startIndex := startLoc + index2 + text = text[:startIndex] + + text[startIndex+dmp.DiffXIndex(diffs, index1+len(aDiff.Text))-index2:] + } + } + if aDiff.Type != DiffDelete { + index1 += len(aDiff.Text) + } + } + } + } + } + x++ + } + // Strip the padding off. + text = text[len(nullPadding) : len(nullPadding)+(len(text)-2*len(nullPadding))] + return text, results +} + +// PatchAddPadding adds some padding on text start and end so that edges can match something. +// Intended to be called only from within patchApply. +func (dmp *DiffMatchPatch) PatchAddPadding(patches []Patch) string { + paddingLength := dmp.PatchMargin + nullPadding := "" + for x := 1; x <= paddingLength; x++ { + nullPadding += string(x) + } + + // Bump all the patches forward. + for i := range patches { + patches[i].start1 += paddingLength + patches[i].start2 += paddingLength + } + + // Add some padding on start of first diff. + if len(patches[0].diffs) == 0 || patches[0].diffs[0].Type != DiffEqual { + // Add nullPadding equality. + patches[0].diffs = append([]Diff{Diff{DiffEqual, nullPadding}}, patches[0].diffs...) + patches[0].start1 -= paddingLength // Should be 0. + patches[0].start2 -= paddingLength // Should be 0. + patches[0].length1 += paddingLength + patches[0].length2 += paddingLength + } else if paddingLength > len(patches[0].diffs[0].Text) { + // Grow first equality. + extraLength := paddingLength - len(patches[0].diffs[0].Text) + patches[0].diffs[0].Text = nullPadding[len(patches[0].diffs[0].Text):] + patches[0].diffs[0].Text + patches[0].start1 -= extraLength + patches[0].start2 -= extraLength + patches[0].length1 += extraLength + patches[0].length2 += extraLength + } + + // Add some padding on end of last diff. + last := len(patches) - 1 + if len(patches[last].diffs) == 0 || patches[last].diffs[len(patches[last].diffs)-1].Type != DiffEqual { + // Add nullPadding equality. + patches[last].diffs = append(patches[last].diffs, Diff{DiffEqual, nullPadding}) + patches[last].length1 += paddingLength + patches[last].length2 += paddingLength + } else if paddingLength > len(patches[last].diffs[len(patches[last].diffs)-1].Text) { + // Grow last equality. + lastDiff := patches[last].diffs[len(patches[last].diffs)-1] + extraLength := paddingLength - len(lastDiff.Text) + patches[last].diffs[len(patches[last].diffs)-1].Text += nullPadding[:extraLength] + patches[last].length1 += extraLength + patches[last].length2 += extraLength + } + + return nullPadding +} + +// PatchSplitMax looks through the patches and breaks up any which are longer than the +// maximum limit of the match algorithm. +// Intended to be called only from within patchApply. +func (dmp *DiffMatchPatch) PatchSplitMax(patches []Patch) []Patch { + patchSize := dmp.MatchMaxBits + for x := 0; x < len(patches); x++ { + if patches[x].length1 <= patchSize { + continue + } + bigpatch := patches[x] + // Remove the big old patch. + patches = append(patches[:x], patches[x+1:]...) + x-- + + start1 := bigpatch.start1 + start2 := bigpatch.start2 + precontext := "" + for len(bigpatch.diffs) != 0 { + // Create one of several smaller patches. + patch := Patch{} + empty := true + patch.start1 = start1 - len(precontext) + patch.start2 = start2 - len(precontext) + if len(precontext) != 0 { + patch.length1 = len(precontext) + patch.length2 = len(precontext) + patch.diffs = append(patch.diffs, Diff{DiffEqual, precontext}) + } + for len(bigpatch.diffs) != 0 && patch.length1 < patchSize-dmp.PatchMargin { + diffType := bigpatch.diffs[0].Type + diffText := bigpatch.diffs[0].Text + if diffType == DiffInsert { + // Insertions are harmless. + patch.length2 += len(diffText) + start2 += len(diffText) + patch.diffs = append(patch.diffs, bigpatch.diffs[0]) + bigpatch.diffs = bigpatch.diffs[1:] + empty = false + } else if diffType == DiffDelete && len(patch.diffs) == 1 && patch.diffs[0].Type == DiffEqual && len(diffText) > 2*patchSize { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += len(diffText) + start1 += len(diffText) + empty = false + patch.diffs = append(patch.diffs, Diff{diffType, diffText}) + bigpatch.diffs = bigpatch.diffs[1:] + } else { + // Deletion or equality. Only take as much as we can stomach. + diffText = diffText[:min(len(diffText), patchSize-patch.length1-dmp.PatchMargin)] + + patch.length1 += len(diffText) + start1 += len(diffText) + if diffType == DiffEqual { + patch.length2 += len(diffText) + start2 += len(diffText) + } else { + empty = false + } + patch.diffs = append(patch.diffs, Diff{diffType, diffText}) + if diffText == bigpatch.diffs[0].Text { + bigpatch.diffs = bigpatch.diffs[1:] + } else { + bigpatch.diffs[0].Text = + bigpatch.diffs[0].Text[len(diffText):] + } + } + } + // Compute the head context for the next patch. + precontext = dmp.DiffText2(patch.diffs) + precontext = precontext[max(0, len(precontext)-dmp.PatchMargin):] + + postcontext := "" + // Append the end context for this patch. + if len(dmp.DiffText1(bigpatch.diffs)) > dmp.PatchMargin { + postcontext = dmp.DiffText1(bigpatch.diffs)[:dmp.PatchMargin] + } else { + postcontext = dmp.DiffText1(bigpatch.diffs) + } + + if len(postcontext) != 0 { + patch.length1 += len(postcontext) + patch.length2 += len(postcontext) + if len(patch.diffs) != 0 && patch.diffs[len(patch.diffs)-1].Type == DiffEqual { + patch.diffs[len(patch.diffs)-1].Text += postcontext + } else { + patch.diffs = append(patch.diffs, Diff{DiffEqual, postcontext}) + } + } + if !empty { + x++ + patches = append(patches[:x], append([]Patch{patch}, patches[x:]...)...) + } + } + } + return patches +} + +// PatchToText takes a list of patches and returns a textual representation. +func (dmp *DiffMatchPatch) PatchToText(patches []Patch) string { + var text bytes.Buffer + for _, aPatch := range patches { + _, _ = text.WriteString(aPatch.String()) + } + return text.String() +} + +// PatchFromText parses a textual representation of patches and returns a List of Patch +// objects. +func (dmp *DiffMatchPatch) PatchFromText(textline string) ([]Patch, error) { + patches := []Patch{} + if len(textline) == 0 { + return patches, nil + } + text := strings.Split(textline, "\n") + textPointer := 0 + patchHeader := regexp.MustCompile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$") + + var patch Patch + var sign uint8 + var line string + for textPointer < len(text) { + + if !patchHeader.MatchString(text[textPointer]) { + return patches, errors.New("Invalid patch string: " + text[textPointer]) + } + + patch = Patch{} + m := patchHeader.FindStringSubmatch(text[textPointer]) + + patch.start1, _ = strconv.Atoi(m[1]) + if len(m[2]) == 0 { + patch.start1-- + patch.length1 = 1 + } else if m[2] == "0" { + patch.length1 = 0 + } else { + patch.start1-- + patch.length1, _ = strconv.Atoi(m[2]) + } + + patch.start2, _ = strconv.Atoi(m[3]) + + if len(m[4]) == 0 { + patch.start2-- + patch.length2 = 1 + } else if m[4] == "0" { + patch.length2 = 0 + } else { + patch.start2-- + patch.length2, _ = strconv.Atoi(m[4]) + } + textPointer++ + + for textPointer < len(text) { + if len(text[textPointer]) > 0 { + sign = text[textPointer][0] + } else { + textPointer++ + continue + } + + line = text[textPointer][1:] + line = strings.Replace(line, "+", "%2b", -1) + line, _ = url.QueryUnescape(line) + if sign == '-' { + // Deletion. + patch.diffs = append(patch.diffs, Diff{DiffDelete, line}) + } else if sign == '+' { + // Insertion. + patch.diffs = append(patch.diffs, Diff{DiffInsert, line}) + } else if sign == ' ' { + // Minor equality. + patch.diffs = append(patch.diffs, Diff{DiffEqual, line}) + } else if sign == '@' { + // Start of next patch. + break + } else { + // WTF? + return patches, errors.New("Invalid patch mode '" + string(sign) + "' in: " + string(line)) + } + textPointer++ + } + + patches = append(patches, patch) + } + return patches, nil +} |