diff options
-rw-r--r-- | modules/highlight/highlight.go | 8 | ||||
-rw-r--r-- | services/gitdiff/gitdiff.go | 312 | ||||
-rw-r--r-- | services/gitdiff/gitdiff_test.go | 88 | ||||
-rw-r--r-- | services/gitdiff/highlightdiff.go | 223 | ||||
-rw-r--r-- | services/gitdiff/highlightdiff_test.go | 126 |
5 files changed, 379 insertions, 378 deletions
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index acd3bebb9f..6832207c0f 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -40,9 +40,11 @@ var ( // NewContext loads custom highlight map from local config func NewContext() { once.Do(func() { - keys := setting.Cfg.Section("highlight.mapping").Keys() - for i := range keys { - highlightMapping[keys[i].Name()] = keys[i].Value() + if setting.Cfg != nil { + keys := setting.Cfg.Section("highlight.mapping").Keys() + for i := range keys { + highlightMapping[keys[i].Name()] = keys[i].Value() + } } // The size 512 is simply a conservative rule of thumb diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6e8c149dab..6dd237bbc8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -15,7 +15,6 @@ import ( "io" "net/url" "os" - "regexp" "sort" "strings" "time" @@ -40,7 +39,7 @@ import ( "golang.org/x/text/transform" ) -// DiffLineType represents the type of a DiffLine. +// DiffLineType represents the type of DiffLine. type DiffLineType uint8 // DiffLineType possible values. @@ -51,7 +50,7 @@ const ( DiffLineSection ) -// DiffFileType represents the type of a DiffFile. +// DiffFileType represents the type of DiffFile. type DiffFileType uint8 // DiffFileType possible values. @@ -100,12 +99,12 @@ type DiffLineSectionInfo struct { // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 -// GetType returns the type of a DiffLine. +// GetType returns the type of DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) } -// CanComment returns whether or not a line can get commented +// CanComment returns whether a line can get commented func (d *DiffLine) CanComment() bool { return len(d.Comments) == 0 && d.Type != DiffLineSection } @@ -191,287 +190,13 @@ var ( codeTagSuffix = []byte(`</span>`) ) -var ( - unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`) - trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`) - entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`) -) - -// shouldWriteInline represents combinations where we manually write inline changes -func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool { - if true && - diff.Type == diffmatchpatch.DiffEqual || - diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd || - diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel { - return true - } - return false -} - -func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { - // Create a new array to store our fixed up blocks - fixedup := make([]diffmatchpatch.Diff, 0, len(diffs)) - - // semantically label some numbers - const insert, delete, equal = 0, 1, 2 - - // record the positions of the last type of each block in the fixedup blocks - last := []int{-1, -1, -1} - operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual} - - // create a writer for insert and deletes - toWrite := []strings.Builder{ - {}, - {}, - } - - // make some flags for insert and delete - unfinishedTag := []bool{false, false} - unfinishedEnt := []bool{false, false} - - // store stores the provided text in the writer for the typ - store := func(text string, typ int) { - (&(toWrite[typ])).WriteString(text) - } - - // hasStored returns true if there is stored content - hasStored := func(typ int) bool { - return (&toWrite[typ]).Len() > 0 - } - - // stored will return that content - stored := func(typ int) string { - return (&toWrite[typ]).String() - } - - // empty will empty the stored content - empty := func(typ int) { - (&toWrite[typ]).Reset() - } - - // pop will remove the stored content appending to a diff block for that typ - pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff { - if hasStored(typ) { - if last[typ] > last[equal] { - fixedup[last[typ]].Text += stored(typ) - } else { - fixedup = append(fixedup, diffmatchpatch.Diff{ - Type: operation[typ], - Text: stored(typ), - }) - } - empty(typ) - } - return fixedup - } - - // Now we walk the provided diffs and check the type of each block in turn - for _, diff := range diffs { - - typ := delete // flag for handling insert or delete typs - switch diff.Type { - case diffmatchpatch.DiffEqual: - // First check if there is anything stored - if hasStored(insert) || hasStored(delete) { - // There are two reasons for storing content: - // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag - if unfinishedEnt[insert] || unfinishedEnt[delete] { - // we look for a ';' to finish an entity - idx := strings.IndexRune(diff.Text, ';') - if idx >= 0 { - // if we find a ';' store the preceding content to both insert and delete - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - - // and remove it from this block - diff.Text = diff.Text[idx+1:] - - // reset the ent flags - unfinishedEnt[insert] = false - unfinishedEnt[delete] = false - } else { - // otherwise store it all on insert and delete - store(diff.Text, insert) - store(diff.Text, delete) - // and empty this block - diff.Text = "" - } - } - // 2. Unfinished Tag - if unfinishedTag[insert] || unfinishedTag[delete] { - // we look for a '>' to finish a tag - idx := strings.IndexRune(diff.Text, '>') - if idx >= 0 { - store(diff.Text[:idx+1], insert) - store(diff.Text[:idx+1], delete) - diff.Text = diff.Text[idx+1:] - unfinishedTag[insert] = false - unfinishedTag[delete] = false - } else { - store(diff.Text, insert) - store(diff.Text, delete) - diff.Text = "" - } - } - - // If we've completed the required tag/entities - if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) { - // pop off the stack - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - } - - // If that has left this diff block empty then shortcut - if len(diff.Text) == 0 { - continue - } - } - - // check if this block ends in an unfinished tag? - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[insert] = true - unfinishedTag[delete] = true - } else { - // otherwise does it end in an unfinished entity? - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[insert] = true - unfinishedEnt[delete] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], insert) - store(diff.Text[idx[0]:], delete) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // store the position of the last equal block and store it in our diffs - last[equal] = len(fixedup) - fixedup = append(fixedup, diff) - } - continue - case diffmatchpatch.DiffInsert: - typ = insert - fallthrough - case diffmatchpatch.DiffDelete: - // First check if there is anything stored for this type - if hasStored(typ) { - // if there is prepend it to this block, empty the storage and reset our flags - diff.Text = stored(typ) + diff.Text - empty(typ) - unfinishedEnt[typ] = false - unfinishedTag[typ] = false - } - - // check if this block ends in an unfinished tag - idx := unfinishedtagRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedTag[typ] = true - } else { - // otherwise does it end in an unfinished entity - idx = entityRegex.FindStringIndex(diff.Text) - if idx != nil { - unfinishedEnt[typ] = true - } - } - - // If there is an unfinished component - if idx != nil { - // Store the fragment - store(diff.Text[idx[0]:], typ) - // and remove it from this block - diff.Text = diff.Text[:idx[0]] - } - - // If that hasn't left the block empty - if len(diff.Text) > 0 { - // if the last block of this type was after the last equal block - if last[typ] > last[equal] { - // store this blocks content on that block - fixedup[last[typ]].Text += diff.Text - } else { - // otherwise store the position of the last block of this type and store the block - last[typ] = len(fixedup) - fixedup = append(fixedup, diff) - } - } - continue - } - } - - // pop off any remaining stored content - fixedup = pop(insert, fixedup) - fixedup = pop(delete, fixedup) - - return fixedup -} - -func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { +func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { buf := bytes.NewBuffer(nil) - match := "" - - diffs = fixupBrokenSpans(diffs) - + // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary + for _, tag := range lineWrapperTags { + buf.WriteString(tag) + } for _, diff := range diffs { - if shouldWriteInline(diff, lineType) { - if len(match) > 0 { - diff.Text = match + diff.Text - match = "" - } - // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency. - // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together - // before writing so we don't try insert added/removed code spans in the middle of one of those - // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of - // a line ending with an incomplete HTML entity or partial/opening <span>. - - // EX: - // diffs[{Type: dmp.DiffDelete, Text: "language</span><span "}, - // {Type: dmp.DiffEqual, Text: "c"}, - // {Type: dmp.DiffDelete, Text: "lass="p">}] - - // After first iteration - // diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out - // {Type: dmp.DiffEqual, Text: "<span c"}, - // {Type: dmp.DiffDelete, Text: "lass="p">,</span>}] - - // After second iteration - // {Type: dmp.DiffEqual, Text: ""}, // write out - // {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}] - - // Final - // {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}] - // end up writing <span class="removed-code"><span class="p">,</span></span> - // Instead of <span class="removed-code">lass="p",</span></span> - - m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - m = entityRegex.FindStringSubmatchIndex(diff.Text) - if m != nil { - match = diff.Text[m[0]:m[1]] - diff.Text = strings.TrimSuffix(diff.Text, match) - } - // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it - if strings.HasPrefix(diff.Text, "</span>") { - buf.WriteString("</span>") - diff.Text = strings.TrimPrefix(diff.Text, "</span>") - } - // If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below - // The previous/next diff section will contain the rest of the tag that is missing here - if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") { - buf.WriteString(diff.Text) - continue - } - } switch { case diff.Type == diffmatchpatch.DiffEqual: buf.WriteString(diff.Text) @@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT buf.Write(codeTagSuffix) } } - return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) + for range lineWrapperTags { + buf.WriteString("</span>") + } + return buf.String() } // GetLine gets a specific line by type (add or del) and file line number @@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } - diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) + hcd := newHighlightCodeDiff() + diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" + diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) + return DiffInlineWithUnicodeEscape(template.HTML(diffHTML)) } // DiffFile represents a file diff. @@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) { if char == '"' { fmt.Fscanf(rd, "%q ", &name) if len(name) == 0 { - log.Error("Reader has no file name: %v", rd) + log.Error("Reader has no file name: reader=%+v", rd) return "", true } @@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) { } } if len(name) < 2 { - log.Error("Unable to determine name from reader: %v", rd) + log.Error("Unable to determine name from reader: reader=%+v", rd) return "", true } return name[2:], ambiguity diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index caca0e91d8..e88d831759 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -7,7 +7,6 @@ package gitdiff import ( "fmt" - "html/template" "strconv" "strings" "testing" @@ -17,93 +16,27 @@ import ( "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) -func assertEqual(t *testing.T, s1 string, s2 template.HTML) { - if s1 != string(s2) { - t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2) - } -} - func TestDiffToHTML(t *testing.T) { - setting.Cfg = ini.Empty() - assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{ + assert.Equal(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffInsert, Text: "bar"}, {Type: dmp.DiffDelete, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd).Content) + }, DiffLineAdd)) - assertEqual(t, "foo <span class=\"removed-code\">bar</span> biz", diffToHTML("", []dmp.Diff{ + assert.Equal(t, "foo <span class=\"removed-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{ {Type: dmp.DiffEqual, Text: "foo "}, {Type: dmp.DiffDelete, Text: "bar"}, {Type: dmp.DiffInsert, Text: " baz"}, {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel).Content) - - assertEqual(t, "<span class=\"k\">if</span> <span class=\"p\">!</span><span class=\"nx\">nohl</span> <span class=\"o\">&&</span> <span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"nx\">lexer</span> <span class=\"o\">!=</span> <span class=\"kc\">nil</span><span class=\"added-code\"> <span class=\"o\">||</span> <span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nx\">GuessLanguage</span><span class=\"p\">)</span></span> <span class=\"p\">{</span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "<span class=\"k\">if</span> <span class=\"p\">!</span><span class=\"nx\">nohl</span> <span class=\"o\">&&</span> <span class=\""}, - {Type: dmp.DiffInsert, Text: "p\">(</span><span class=\""}, - {Type: dmp.DiffEqual, Text: "nx\">lexer</span> <span class=\"o\">!=</span> <span class=\"kc\">nil"}, - {Type: dmp.DiffInsert, Text: "</span> <span class=\"o\">||</span> <span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nx\">GuessLanguage</span><span class=\"p\">)"}, - {Type: dmp.DiffEqual, Text: "</span> <span class=\"p\">{</span>"}, - }, DiffLineAdd).Content) - - assertEqual(t, "<span class=\"nx\">tagURL</span> <span class=\"o\">:=</span> <span class=\"removed-code\"><span class=\"nx\">fmt</span><span class=\"p\">.</span><span class=\"nf\">Sprintf</span><span class=\"p\">(</span><span class=\"s\">"## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s"</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone\"</span></span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">BaseURL</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Owner</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Repo</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">from</span><span class=\"p\">,</span> <span class=\"nx\">milestoneID</span><span class=\"p\">,</span> <span class=\"nx\">time</span><span class=\"p\">.</span><span class=\"nf\">Now</span><span class=\"p\">(</span><span class=\"p\">)</span><span class=\"p\">.</span><span class=\"nf\">Format</span><span class=\"p\">(</span><span class=\"s\">"2006-01-02"</span><span class=\"p\">)</span></span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "<span class=\"nx\">tagURL</span> <span class=\"o\">:=</span> <span class=\"n"}, - {Type: dmp.DiffDelete, Text: "x\">fmt</span><span class=\"p\">.</span><span class=\"nf\">Sprintf</span><span class=\"p\">(</span><span class=\"s\">"## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s"</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone\""}, - {Type: dmp.DiffInsert, Text: "f\">getGiteaTagURL</span><span class=\"p\">(</span><span class=\"nx\">client"}, - {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">BaseURL</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Owner</span><span class=\"p\">,</span> <span class=\"nx\">ge</span><span class=\"p\">.</span><span class=\"nx\">Repo</span><span class=\"p\">,</span> <span class=\"nx\">"}, - {Type: dmp.DiffDelete, Text: "from</span><span class=\"p\">,</span> <span class=\"nx\">milestoneID</span><span class=\"p\">,</span> <span class=\"nx\">time</span><span class=\"p\">.</span><span class=\"nf\">Now</span><span class=\"p\">(</span><span class=\"p\">)</span><span class=\"p\">.</span><span class=\"nf\">Format</span><span class=\"p\">(</span><span class=\"s\">"2006-01-02"</span><span class=\"p\">)"}, - {Type: dmp.DiffInsert, Text: "ge</span><span class=\"p\">.</span><span class=\"nx\">Milestone</span><span class=\"p\">,</span> <span class=\"nx\">from</span><span class=\"p\">,</span> <span class=\"nx\">milestoneID"}, - {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"}, - }, DiffLineDel).Content) - - assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"}, - {Type: dmp.DiffDelete, Text: "language</span><span "}, - {Type: dmp.DiffEqual, Text: "c"}, - {Type: dmp.DiffDelete, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"}, - {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"}, - }, DiffLineDel).Content) - - assertEqual(t, "<span class=\"added-code\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffInsert, Text: "language</span><span "}, - {Type: dmp.DiffEqual, Text: "c"}, - {Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"}, - {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"}, - }, DiffLineAdd).Content) - - assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">"</span><span class=\"s2\">// </span><span class=\"s2\">"</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"}, - {Type: dmp.DiffInsert, Text: "<span"}, - {Type: dmp.DiffEqual, Text: " "}, - {Type: dmp.DiffInsert, Text: "class=\"p\">(</span>"}, - {Type: dmp.DiffEqual, Text: "<span class=\"sa\"></span><span class=\"s2\">"</span><span class=\"s2\">// </span><span class=\"s2\">"</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span>"}, - {Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"}, - }, DiffLineAdd).Content) - - assertEqual(t, "sh <span class=\"added-code\">'useradd -u $(stat -c "%u" .gitignore) jenkins'</span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "sh "}, - {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""}, - {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"}, - {Type: dmp.DiffEqual, Text: ";"}, - }, DiffLineAdd).Content) - - assertEqual(t, "<span class=\"x\"> <h<span class=\"added-code\">4 class="release-list-title df ac"</span>></span>", diffToHTML("", []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "<span class=\"x\"> <h"}, - {Type: dmp.DiffInsert, Text: "4 class=&#"}, - {Type: dmp.DiffEqual, Text: "3"}, - {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""}, - {Type: dmp.DiffEqual, Text: "></span>"}, - }, DiffLineAdd).Content) + }, DiffLineDel)) } func TestParsePatch_skipTo(t *testing.T) { @@ -592,7 +525,6 @@ index 0000000..6bb8f39 if err != nil { t.Errorf("ParsePatch failed: %s", err) } - println(result) diff2 := `diff --git "a/A \\ B" "b/A \\ B" --- "a/A \\ B" @@ -712,18 +644,6 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { } } -func TestDiffToHTML_14231(t *testing.T) { - setting.Cfg = ini.Empty() - diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "", " run()\n"), highlight.Code("main.v", "", " run(db)\n"), true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - expected := `<span class="line"><span class="cl"> <span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span> -</span></span>` - output := diffToHTML("main.v", diffRecord, DiffLineAdd) - - assertEqual(t, expected, output.Content) -} - func TestNoCrashes(t *testing.T) { type testcase struct { gitdiff string diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go new file mode 100644 index 0000000000..4ceada4d7e --- /dev/null +++ b/services/gitdiff/highlightdiff.go @@ -0,0 +1,223 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "strings" + + "code.gitea.io/gitea/modules/highlight" + + "github.com/sergi/go-diff/diffmatchpatch" +) + +// token is a html tag or entity, eg: "<span ...>", "</span>", "<" +func extractHTMLToken(s string) (before, token, after string, valid bool) { + for pos1 := 0; pos1 < len(s); pos1++ { + if s[pos1] == '<' { + pos2 := strings.IndexByte(s[pos1:], '>') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } else if s[pos1] == '&' { + pos2 := strings.IndexByte(s[pos1:], ';') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } + } + return "", "", s, true +} + +// highlightCodeDiff is used to do diff with highlighted HTML code. +// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. +// The HTML tags and entities will be replaced by Unicode placeholders: "<span>{TEXT}</span>" => "\uE000{TEXT}\uE001" +// These Unicode placeholders are friendly to the diff. +// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities. +// It's guaranteed that the tags in final diff result are paired correctly. +type highlightCodeDiff struct { + placeholderBegin rune + placeholderMaxCount int + placeholderIndex int + placeholderTokenMap map[rune]string + tokenPlaceholderMap map[string]rune + + placeholderOverflowCount int + + lineWrapperTags []string +} + +func newHighlightCodeDiff() *highlightCodeDiff { + return &highlightCodeDiff{ + placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD) + placeholderMaxCount: 64000, + placeholderTokenMap: map[rune]string{}, + tokenPlaceholderMap: map[string]rune{}, + } +} + +// nextPlaceholder returns 0 if no more placeholder can be used +// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line +// so the placeholderMaxCount is impossible to be exhausted in real cases. +func (hcd *highlightCodeDiff) nextPlaceholder() rune { + for hcd.placeholderIndex < hcd.placeholderMaxCount { + r := hcd.placeholderBegin + rune(hcd.placeholderIndex) + hcd.placeholderIndex++ + // only use non-existing (not used by code) rune as placeholders + if _, ok := hcd.placeholderTokenMap[r]; !ok { + return r + } + } + return 0 // no more available placeholder +} + +func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { + return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) +} + +func (hcd *highlightCodeDiff) collectUsedRunes(code string) { + for _, r := range code { + if hcd.isInPlaceholderRange(r) { + // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. + hcd.placeholderTokenMap[r] = "" + } + } +} + +func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { + hcd.collectUsedRunes(codeA) + hcd.collectUsedRunes(codeB) + + highlightCodeA := highlight.Code(filename, language, codeA) + highlightCodeB := highlight.Code(filename, language, codeB) + + highlightCodeA = hcd.convertToPlaceholders(highlightCodeA) + highlightCodeB = hcd.convertToPlaceholders(highlightCodeB) + + diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true) + diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + + for i := range diffs { + hcd.recoverOneDiff(&diffs[i]) + } + return diffs +} + +// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. +func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { + var tagStack []string + res := strings.Builder{} + + firstRunForLineTags := hcd.lineWrapperTags == nil + + var beforeToken, token string + var valid bool + + // the standard chroma highlight HTML is "<span class="line [hl]"><span class="cl"> ... </span></span>" + for { + beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) + if !valid || token == "" { + break + } + // write the content before the token into result string, and consume the token in the string + res.WriteString(beforeToken) + + // the line wrapper tags should be removed before diff + if strings.HasPrefix(token, `<span class="line`) || strings.HasPrefix(token, `<span class="cl"`) { + if firstRunForLineTags { + // if this is the first run for converting, save the line wrapper tags for later use, they should be added back + hcd.lineWrapperTags = append(hcd.lineWrapperTags, token) + } + htmlCode = strings.TrimSuffix(htmlCode, "</span>") + continue + } + + var tokenInMap string + if strings.HasSuffix(token, "</") { // for closing tag + if len(tagStack) == 0 { + break // invalid diff result, no opening tag but see closing tag + } + // make sure the closing tag in map is related to the open tag, to make the diff algorithm can match the opening/closing tags + // the closing tag will be recorded in the map by key "</span><!-- <span the-opening> -->" for "<span the-opening>" + tokenInMap = token + "<!-- " + tagStack[len(tagStack)-1] + "-->" + tagStack = tagStack[:len(tagStack)-1] + } else if token[0] == '<' { // for opening tag + tokenInMap = token + tagStack = append(tagStack, token) + } else if token[0] == '&' { // for html entity + tokenInMap = token + } // else: impossible + + // remember the placeholder and token in the map + placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] + if !ok { + placeholder = hcd.nextPlaceholder() + if placeholder != 0 { + hcd.tokenPlaceholderMap[tokenInMap] = placeholder + hcd.placeholderTokenMap[placeholder] = tokenInMap + } + } + + if placeholder != 0 { + res.WriteRune(placeholder) // use the placeholder to replace the token + } else { + // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting + // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. + hcd.placeholderOverflowCount++ + if strings.HasPrefix(token, "&") { + // when the token is a html entity, something must be outputted even if there is no placeholder. + res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? + res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result. + } + } + } + + // write the remaining string + res.WriteString(htmlCode) + return res.String() +} + +func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { + sb := strings.Builder{} + var tagStack []string + + for _, r := range diff.Text { + token, ok := hcd.placeholderTokenMap[r] + if !ok || token == "" { + sb.WriteRune(r) // if the rune is not a placeholder, write it as it is + continue + } + var tokenToRecover string + if strings.HasPrefix(token, "</") { // for closing tag + // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function) + tokenToRecover = token[:strings.IndexByte(token, '>')+1] + if len(tagStack) == 0 { + continue // if no opening tag in stack yet, skip the closing tag + } + tagStack = tagStack[:len(tagStack)-1] + } else if token[0] == '<' { // for opening tag + tokenToRecover = token + tagStack = append(tagStack, token) + } else if token[0] == '&' { // for html entity + tokenToRecover = token + } // else: impossible + sb.WriteString(tokenToRecover) + } + + if len(tagStack) > 0 { + // close all opening tags + for i := len(tagStack) - 1; i >= 0; i-- { + tagToClose := tagStack[i] + // get the closing tag "</span>" from "<span class=...>" or "<span>" + pos := strings.IndexAny(tagToClose, " >") + if pos != -1 { + sb.WriteString("</" + tagToClose[1:pos] + ">") + } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag + } + } + + diff.Text = sb.String() +} diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go new file mode 100644 index 0000000000..1cd78bc942 --- /dev/null +++ b/services/gitdiff/highlightdiff_test.go @@ -0,0 +1,126 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "fmt" + "strings" + "testing" + + "github.com/sergi/go-diff/diffmatchpatch" + "github.com/stretchr/testify/assert" +) + +func TestDiffWithHighlight(t *testing.T) { + hcd := newHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.v", "", + " run('<>')\n", + " run(db)\n", + ) + + expected := ` <span class="n">run</span><span class="o">(</span><span class="removed-code"><span class="k">'</span><span class="o"><</span><span class="o">></span><span class="k">'</span></span><span class="o">)</span>` + "\n" + output := diffToHTML(nil, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + expected = ` <span class="n">run</span><span class="o">(</span><span class="added-code"><span class="n">db</span></span><span class="o">)</span>` + "\n" + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + hcd.placeholderTokenMap['O'] = "<span>" + hcd.placeholderTokenMap['C'] = "</span>" + diff := diffmatchpatch.Diff{} + + diff.Text = "OC" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "<span></span>", diff.Text) + + diff.Text = "O" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "<span></span>", diff.Text) + + diff.Text = "C" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) +} + +func TestDiffWithHighlightPlaceholder(t *testing.T) { + hcd := newHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD''", + ) + assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000]) + assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD]) + + expected := fmt.Sprintf(`<span class="line"><span class="cl"><span class="nx">a</span><span class="o">=</span><span class="s1">'</span><span class="removed-code">%s</span>'</span></span>`, "\U00100000") + output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + diffs = hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD'", + ) + expected = fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">'</span><span class="added-code">%s</span>'`, "\U0010FFFD") + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { + hcd := newHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs := hcd.diffWithHighlight( + "main.js", "", + "'", + ``, + ) + output := diffToHTML(nil, diffs, DiffLineDel) + expected := fmt.Sprintf(`<span class="removed-code">%s#39;</span>`, "\uFFFD") + assert.Equal(t, expected, output) + + hcd = newHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs = hcd.diffWithHighlight( + "main.js", "", + "a < b", + "a > b", + ) + output = diffToHTML(nil, diffs, DiffLineDel) + expected = fmt.Sprintf(`a %s<span class="removed-code">l</span>t; b`, "\uFFFD") + assert.Equal(t, expected, output) + + output = diffToHTML(nil, diffs, DiffLineAdd) + expected = fmt.Sprintf(`a %s<span class="added-code">g</span>t; b`, "\uFFFD") + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightTagMatch(t *testing.T) { + totalOverflow := 0 + for i := 0; i < 100; i++ { + hcd := newHighlightCodeDiff() + hcd.placeholderMaxCount = i + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='1'", + "b='2'", + ) + totalOverflow += hcd.placeholderOverflowCount + + output := diffToHTML(nil, diffs, DiffLineDel) + c1 := strings.Count(output, "<span") + c2 := strings.Count(output, "</span") + assert.Equal(t, c1, c2) + + output = diffToHTML(nil, diffs, DiffLineAdd) + c1 = strings.Count(output, "<span") + c2 = strings.Count(output, "</span") + assert.Equal(t, c1, c2) + } + assert.NotZero(t, totalOverflow) +} |