diff options
Diffstat (limited to 'services/gitdiff/gitdiff.go')
-rw-r--r-- | services/gitdiff/gitdiff.go | 312 |
1 files changed, 21 insertions, 291 deletions
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6e8c149dab..6dd237bbc8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -15,7 +15,6 @@ import ( "io" "net/url" "os" - "regexp" "sort" "strings" "time" @@ -40,7 +39,7 @@ import ( "golang.org/x/text/transform" ) -// DiffLineType represents the type of a DiffLine. +// DiffLineType represents the type of DiffLine. type DiffLineType uint8 // DiffLineType possible values. @@ -51,7 +50,7 @@ const ( DiffLineSection ) -// DiffFileType represents the type of a DiffFile. +// DiffFileType represents the type of DiffFile. type DiffFileType uint8 // DiffFileType possible values. @@ -100,12 +99,12 @@ type DiffLineSectionInfo struct { // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 -// GetType returns the type of a DiffLine. +// GetType returns the type of DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) } -// CanComment returns whether or not a line can get commented +// CanComment returns whether a line can get commented func (d *DiffLine) CanComment() bool { return len(d.Comments) == 0 && d.Type != DiffLineSection } @@ -191,287 +190,13 @@ var ( codeTagSuffix = []byte(`</span>`) ) -var ( - unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`) - trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`) - entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`) -) - -// shouldWriteInline represents combinations where we manually write inline changes -func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool { - if true && - diff.Type == diffmatchpatch.DiffEqual || - diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd || - diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel { - return true - } - return false -} - -func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { - // Create a new array to store our fixed up blocks - fixedup := make([]diffmatchpatch.Diff, 0, len(diffs)) - - // semantically label some numbers - const insert, delete, equal = 0, 1, 2 - - // record the positions of the last type of each block in the fixedup blocks - last := []int{-1, -1, -1} - operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual} - - // create a writer for insert and deletes - toWrite := []strings.Builder{ - {}, - {}, - } - - // make some flags for insert and delete - unfinishedTag := []bool{false, false} - unfinishedEnt := []bool{false, false} - - // store stores the provided text in the writer for the typ - store := func(text string, typ int) { - (&(toWrite[typ])).WriteString(text) - } - - // hasStored returns true if there is stored content - hasStored := func(typ int) bool { - return (&toWrite[typ]).Len() > 0 - } - - // stored will return that content - stored := func(typ int) string { - return (&toWrite[typ]).String() - } - - // empty will empty the stored content - empty := func(typ int) { - (&toWrite[typ]).Reset() - } - - // pop will remove the stored content appending to a diff block for that typ - pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff { - if hasStored(typ) { - if last[typ] > last[equal] { - fixedup[last[typ]].Text += stored(typ) - } else { - fixedup = append(fixedup, diffmatchpatch.Diff{ - Type: operation[typ], - Text: stored(typ), - }) - } - empty(typ) - } - return fixedup - } - - // Now we walk the provided diffs and check the type of each block in turn - for _, diff := range diffs { - - typ := delete // flag for handling insert or delete typs - switch diff.Type { - case diffmatchpatch.DiffEqual: - // First check if there is anything stored - if hasStored(insert) || hasStored(delete) { - // There are two reasons for storing content: - // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag - if unfinishedEnt[insert] || unfinishedEnt[delete] { - // we look for a ';' to finish an entity - idx := strings.IndexRune(diff.Text, ';') - if idx >= 0 { - // if we find a ';' store the preceding content to both insert and delete - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - - // and remove it from this block - diff.Text = diff.Text[idx+1:] - - // reset the ent flags - unfinishedEnt[insert] = false - unfinishedEnt[delete] = false - } else { - // otherwise store it all on insert and delete - store(diff.Text, insert) - store(diff.Text, delete) - // and empty this block - diff.Text = "" - } - } - // 2. Unfinished Tag - if unfinishedTag[insert] || unfinishedTag[delete] { - // we look for a '>' to finish a tag - idx := strings.IndexRune(diff.Text, '>') - if idx >= 0 { - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - diff.Text = diff.Text[idx+1:] - unfinishedTag[insert] = false - unfinishedTag[delete] = false - } else { - store(diff.Text, insert) - store(diff.Text, delete) - diff.Text = "" - } - } - - // If we've completed the required tag/entities - if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) { - // pop off the stack - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - } - - // If that has left this diff block empty then shortcut - if len(diff.Text) == 0 { - continue - } - } - - // check if this block ends in an unfinished tag? - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[insert] = true - unfinishedTag[delete] = true - } else { - // otherwise does it end in an unfinished entity? - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[insert] = true - unfinishedEnt[delete] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], insert) - store(diff.Text[idx[0]:], delete) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // store the position of the last equal block and store it in our diffs - last[equal] = len(fixedup) - fixedup = append(fixedup, diff) - } - continue - case diffmatchpatch.DiffInsert: - typ = insert - fallthrough - case diffmatchpatch.DiffDelete: - // First check if there is anything stored for this type - if hasStored(typ) { - // if there is prepend it to this block, empty the storage and reset our flags - diff.Text = stored(typ) + diff.Text - empty(typ) - unfinishedEnt[typ] = false - unfinishedTag[typ] = false - } - - // check if this block ends in an unfinished tag - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[typ] = true - } else { - // otherwise does it end in an unfinished entity - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[typ] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], typ) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // if the last block of this type was after the last equal block - if last[typ] > last[equal] { - // store this blocks content on that block - fixedup[last[typ]].Text += diff.Text - } else { - // otherwise store the position of the last block of this type and store the block - last[typ] = len(fixedup) - fixedup = append(fixedup, diff) - } - } - continue - } - } - - // pop off any remaining stored content - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - - return fixedup -} - -func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { +func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { buf := bytes.NewBuffer(nil) - match := "" - - diffs = fixupBrokenSpans(diffs) - + // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary + for _, tag := range lineWrapperTags { + buf.WriteString(tag) + } for _, diff := range diffs { - if shouldWriteInline(diff, lineType) { - if len(match) > 0 { - diff.Text = match + diff.Text - match = "" - } - // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency. - // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together - // before writing so we don't try insert added/removed code spans in the middle of one of those - // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of - // a line ending with an incomplete HTML entity or partial/opening <span>. - - // EX: - // diffs[{Type: dmp.DiffDelete, Text: "language</span><span "}, - // {Type: dmp.DiffEqual, Text: "c"}, - // {Type: dmp.DiffDelete, Text: "lass="p">}] - - // After first iteration - // diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out - // {Type: dmp.DiffEqual, Text: "<span c"}, - // {Type: dmp.DiffDelete, Text: "lass="p">,</span>}] - - // After second iteration - // {Type: dmp.DiffEqual, Text: ""}, // write out - // {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}] - - // Final - // {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}] - // end up writing <span class="removed-code"><span class="p">,</span></span> - // Instead of <span class="removed-code">lass="p",</span></span> - - m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - m = entityRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it - if strings.HasPrefix(diff.Text, "</span>") { - buf.WriteString("</span>") - diff.Text = strings.TrimPrefix(diff.Text, "</span>") - } - // If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below - // The previous/next diff section will contain the rest of the tag that is missing here - if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") { - buf.WriteString(diff.Text) - continue - } - } switch { case diff.Type == diffmatchpatch.DiffEqual: buf.WriteString(diff.Text) @@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) + for range lineWrapperTags { + buf.WriteString("</span>") + } + return buf.String() } // GetLine gets a specific line by type (add or del) and file line number @@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } - diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) + hcd := newHighlightCodeDiff() + diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" + diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) + return DiffInlineWithUnicodeEscape(template.HTML(diffHTML)) } // DiffFile represents a file diff. @@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) { if char == '"' { fmt.Fscanf(rd, "%q ", &name) if len(name) == 0 { - log.Error("Reader has no file name: %v", rd) + log.Error("Reader has no file name: reader=%+v", rd) return "", true } @@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) { } } if len(name) < 2 { - log.Error("Unable to determine name from reader: %v", rd) + log.Error("Unable to determine name from reader: reader=%+v", rd) return "", true } return name[2:], ambiguity |