]> source.dussan.org Git - gitea.git/commitdiff
Fix broken spans in diffs (#14678)
authorzeripath <art27@cantab.net>
Sun, 14 Feb 2021 14:51:00 +0000 (14:51 +0000)
committerGitHub <noreply@github.com>
Sun, 14 Feb 2021 14:51:00 +0000 (15:51 +0100)
Gitea runs diff on highlighted code fragment for each line in order to provide
code highlight diffs. Unfortunately this diff algorithm is not aware that span tags
and entities are atomic and cannot be split.

The current fixup code makes some attempt to fix these broken tags however, it cannot
handle situations where a tag is split over multiple blocks.

This PR provides a more algorithmic fixup mechanism whereby spans and entities are
completely coalesced into their respective blocks.

This may result in a incompletely reduced diff but - it will definitely prevent the
broken entities and spans that are currently possible.

As a result of this fixup several inconsistencies were discovered in our testcases
and these were also fixed.

Fix #14231

Signed-off-by: Andrew Thornton <art27@cantab.net>
services/gitdiff/gitdiff.go
services/gitdiff/gitdiff_test.go

index d706dc99c9479ab4ad658be3cc657632ff3c8407..d5c3923516ec34f511b4a10dee4ae9d5fb4da20a 100644 (file)
@@ -182,6 +182,8 @@ var (
        removedCodePrefix = []byte(`<span class="removed-code">`)
        codeTagSuffix     = []byte(`</span>`)
 )
+
+var unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
 var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
 var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
 
@@ -196,10 +198,218 @@ func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
        return false
 }
 
+func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
+
+       // Create a new array to store our fixed up blocks
+       fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
+
+       // semantically label some numbers
+       const insert, delete, equal = 0, 1, 2
+
+       // record the positions of the last type of each block in the fixedup blocks
+       last := []int{-1, -1, -1}
+       operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
+
+       // create a writer for insert and deletes
+       toWrite := []strings.Builder{
+               {},
+               {},
+       }
+
+       // make some flags for insert and delete
+       unfinishedTag := []bool{false, false}
+       unfinishedEnt := []bool{false, false}
+
+       // store stores the provided text in the writer for the typ
+       store := func(text string, typ int) {
+               (&(toWrite[typ])).WriteString(text)
+       }
+
+       // hasStored returns true if there is stored content
+       hasStored := func(typ int) bool {
+               return (&toWrite[typ]).Len() > 0
+       }
+
+       // stored will return that content
+       stored := func(typ int) string {
+               return (&toWrite[typ]).String()
+       }
+
+       // empty will empty the stored content
+       empty := func(typ int) {
+               (&toWrite[typ]).Reset()
+       }
+
+       // pop will remove the stored content appending to a diff block for that typ
+       pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
+               if hasStored(typ) {
+                       if last[typ] > last[equal] {
+                               fixedup[last[typ]].Text += stored(typ)
+                       } else {
+                               fixedup = append(fixedup, diffmatchpatch.Diff{
+                                       Type: operation[typ],
+                                       Text: stored(typ),
+                               })
+                       }
+                       empty(typ)
+               }
+               return fixedup
+       }
+
+       // Now we walk the provided diffs and check the type of each block in turn
+       for _, diff := range diffs {
+
+               typ := delete // flag for handling insert or delete typs
+               switch diff.Type {
+               case diffmatchpatch.DiffEqual:
+                       // First check if there is anything stored
+                       if hasStored(insert) || hasStored(delete) {
+                               // There are two reasons for storing content:
+                               // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
+                               if unfinishedEnt[insert] || unfinishedEnt[delete] {
+                                       // we look for a ';' to finish an entity
+                                       idx := strings.IndexRune(diff.Text, ';')
+                                       if idx >= 0 {
+                                               // if we find a ';' store the preceding content to both insert and delete
+                                               store(diff.Text[:idx+1], insert)
+                                               store(diff.Text[:idx+1], delete)
+
+                                               // and remove it from this block
+                                               diff.Text = diff.Text[idx+1:]
+
+                                               // reset the ent flags
+                                               unfinishedEnt[insert] = false
+                                               unfinishedEnt[delete] = false
+                                       } else {
+                                               // otherwise store it all on insert and delete
+                                               store(diff.Text, insert)
+                                               store(diff.Text, delete)
+                                               // and empty this block
+                                               diff.Text = ""
+                                       }
+                               }
+                               // 2. Unfinished Tag
+                               if unfinishedTag[insert] || unfinishedTag[delete] {
+                                       // we look for a '>' to finish a tag
+                                       idx := strings.IndexRune(diff.Text, '>')
+                                       if idx >= 0 {
+                                               store(diff.Text[:idx+1], insert)
+                                               store(diff.Text[:idx+1], delete)
+                                               diff.Text = diff.Text[idx+1:]
+                                               unfinishedTag[insert] = false
+                                               unfinishedTag[delete] = false
+                                       } else {
+                                               store(diff.Text, insert)
+                                               store(diff.Text, delete)
+                                               diff.Text = ""
+                                       }
+                               }
+
+                               // If we've completed the required tag/entities
+                               if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
+                                       // pop off the stack
+                                       fixedup = pop(insert, fixedup)
+                                       fixedup = pop(delete, fixedup)
+                               }
+
+                               // If that has left this diff block empty then shortcut
+                               if len(diff.Text) == 0 {
+                                       continue
+                               }
+                       }
+
+                       // check if this block ends in an unfinished tag?
+                       idx := unfinishedtagRegex.FindStringIndex(diff.Text)
+                       if idx != nil {
+                               unfinishedTag[insert] = true
+                               unfinishedTag[delete] = true
+                       } else {
+                               // otherwise does it end in an unfinished entity?
+                               idx = entityRegex.FindStringIndex(diff.Text)
+                               if idx != nil {
+                                       unfinishedEnt[insert] = true
+                                       unfinishedEnt[delete] = true
+                               }
+                       }
+
+                       // If there is an unfinished component
+                       if idx != nil {
+                               // Store the fragment
+                               store(diff.Text[idx[0]:], insert)
+                               store(diff.Text[idx[0]:], delete)
+                               // and remove it from this block
+                               diff.Text = diff.Text[:idx[0]]
+                       }
+
+                       // If that hasn't left the block empty
+                       if len(diff.Text) > 0 {
+                               // store the position of the last equal block and store it in our diffs
+                               last[equal] = len(fixedup)
+                               fixedup = append(fixedup, diff)
+                       }
+                       continue
+               case diffmatchpatch.DiffInsert:
+                       typ = insert
+                       fallthrough
+               case diffmatchpatch.DiffDelete:
+                       // First check if there is anything stored for this type
+                       if hasStored(typ) {
+                               // if there is prepend it to this block, empty the storage and reset our flags
+                               diff.Text = stored(typ) + diff.Text
+                               empty(typ)
+                               unfinishedEnt[typ] = false
+                               unfinishedTag[typ] = false
+                       }
+
+                       // check if this block ends in an unfinished tag
+                       idx := unfinishedtagRegex.FindStringIndex(diff.Text)
+                       if idx != nil {
+                               unfinishedTag[typ] = true
+                       } else {
+                               // otherwise does it end in an unfinished entity
+                               idx = entityRegex.FindStringIndex(diff.Text)
+                               if idx != nil {
+                                       unfinishedEnt[typ] = true
+                               }
+                       }
+
+                       // If there is an unfinished component
+                       if idx != nil {
+                               // Store the fragment
+                               store(diff.Text[idx[0]:], typ)
+                               // and remove it from this block
+                               diff.Text = diff.Text[:idx[0]]
+                       }
+
+                       // If that hasn't left the block empty
+                       if len(diff.Text) > 0 {
+                               // if the last block of this type was after the last equal block
+                               if last[typ] > last[equal] {
+                                       // store this blocks content on that block
+                                       fixedup[last[typ]].Text += diff.Text
+                               } else {
+                                       // otherwise store the position of the last block of this type and store the block
+                                       last[typ] = len(fixedup)
+                                       fixedup = append(fixedup, diff)
+                               }
+                       }
+                       continue
+               }
+       }
+
+       // pop off any remaining stored content
+       fixedup = pop(insert, fixedup)
+       fixedup = pop(delete, fixedup)
+
+       return fixedup
+}
+
 func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
        buf := bytes.NewBuffer(nil)
        match := ""
 
+       diffs = fixupBrokenSpans(diffs)
+
        for _, diff := range diffs {
                if shouldWriteInline(diff, lineType) {
                        if len(match) > 0 {
index cd7b2273cc389cf6c2dfa6514841fb3a847b3847..a832a5d94f6894dce152d1482a6bc93c8bda94dd 100644 (file)
@@ -15,6 +15,7 @@ import (
 
        "code.gitea.io/gitea/models"
        "code.gitea.io/gitea/modules/git"
+       "code.gitea.io/gitea/modules/highlight"
        "code.gitea.io/gitea/modules/setting"
        dmp "github.com/sergi/go-diff/diffmatchpatch"
        "github.com/stretchr/testify/assert"
@@ -23,7 +24,7 @@ import (
 
 func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
        if s1 != string(s2) {
-               t.Errorf("%s should be equal %s", s2, s1)
+               t.Errorf("Did not receive expected results:\nExpected: %s\nActual:   %s", s1, s2)
        }
 }
 
@@ -61,7 +62,7 @@ func TestDiffToHTML(t *testing.T) {
                {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">)</span>"},
        }, DiffLineDel))
 
-       assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span></span><span class=\"removed-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
+       assertEqual(t, "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"removed-code\"><span class=\"nx\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
                {Type: dmp.DiffEqual, Text: "<span class=\"nx\">r</span><span class=\"p\">.</span><span class=\"nf\">WrapperRenderer</span><span class=\"p\">(</span><span class=\"nx\">w</span><span class=\"p\">,</span> <span class=\"nx\">"},
                {Type: dmp.DiffDelete, Text: "language</span><span "},
                {Type: dmp.DiffEqual, Text: "c"},
@@ -69,14 +70,14 @@ func TestDiffToHTML(t *testing.T) {
                {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
        }, DiffLineDel))
 
-       assertEqual(t, "<span class=\"added-code\">language</span></span><span class=\"added-code\"><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
+       assertEqual(t, "<span class=\"added-code\">language</span><span class=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs</span></span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>", diffToHTML("", []dmp.Diff{
                {Type: dmp.DiffInsert, Text: "language</span><span "},
                {Type: dmp.DiffEqual, Text: "c"},
                {Type: dmp.DiffInsert, Text: "lass=\"p\">,</span> <span class=\"kc\">true</span><span class=\"p\">,</span> <span class=\"nx\">attrs"},
                {Type: dmp.DiffEqual, Text: "</span><span class=\"p\">,</span> <span class=\"kc\">false</span><span class=\"p\">)</span>"},
        }, DiffLineAdd))
 
-       assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"></span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
+       assertEqual(t, "<span class=\"k\">print</span><span class=\"added-code\"><span class=\"p\">(</span></span><span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span><span class=\"added-code\"><span class=\"p\">)</span></span>", diffToHTML("", []dmp.Diff{
                {Type: dmp.DiffEqual, Text: "<span class=\"k\">print</span>"},
                {Type: dmp.DiffInsert, Text: "<span"},
                {Type: dmp.DiffEqual, Text: " "},
@@ -85,14 +86,14 @@ func TestDiffToHTML(t *testing.T) {
                {Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
        }, DiffLineAdd))
 
-       assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
+       assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;</span>", diffToHTML("", []dmp.Diff{
                {Type: dmp.DiffEqual, Text: "sh &#3"},
                {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
                {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
                {Type: dmp.DiffEqual, Text: ";"},
        }, DiffLineAdd))
 
-       assertEqual(t, "<span class=\"x\">                                                      &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
+       assertEqual(t, "<span class=\"x\">                                                      &lt;h<span class=\"added-code\">4 class=&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
                {Type: dmp.DiffEqual, Text: "<span class=\"x\">                                                 &lt;h"},
                {Type: dmp.DiffInsert, Text: "4 class=&#"},
                {Type: dmp.DiffEqual, Text: "3"},
@@ -462,3 +463,14 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
                }
        }
 }
+
+func TestDiffToHTML_14231(t *testing.T) {
+       setting.Cfg = ini.Empty()
+       diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "                run()\n"), highlight.Code("main.v", "           run(db)\n"), true)
+       diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
+
+       expected := `           <span class="n">run</span><span class="added-code"><span class="o">(</span><span class="n">db</span></span><span class="o">)</span>`
+       output := diffToHTML("main.v", diffRecord, DiffLineAdd)
+
+       assertEqual(t, expected, output)
+}