diff options
author | Dustin Firebaugh <dafirebaugh@gmail.com> | 2025-03-08 23:51:58 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-09 12:51:58 +0800 |
commit | 3f1f808b9eeb3f7cd923c6b89fbb57583202e76d (patch) | |
tree | 1eb4364b04aa43657f6846e5ee94c8bd7a405b1d /services | |
parent | 6f1333175461a6cf5497965c20b5d81b6e73c5d5 (diff) | |
download | gitea-3f1f808b9eeb3f7cd923c6b89fbb57583202e76d.tar.gz gitea-3f1f808b9eeb3f7cd923c6b89fbb57583202e76d.zip |
Full-file syntax highlighting for diff pages (#33766)
Fix #33358, fix #21970
This adds a step in the `GitDiffForRender` that does syntax highlighting for the
entire file and then only references lines from that syntax highlighted
code. This allows things like multi-line comments to be syntax
highlighted correctly.
---------
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Diffstat (limited to 'services')
-rw-r--r-- | services/gitdiff/gitdiff.go | 352 | ||||
-rw-r--r-- | services/gitdiff/gitdiff_test.go | 19 | ||||
-rw-r--r-- | services/gitdiff/highlightdiff.go | 94 | ||||
-rw-r--r-- | services/gitdiff/highlightdiff_test.go | 137 | ||||
-rw-r--r-- | services/repository/files/diff_test.go | 1 |
5 files changed, 318 insertions, 285 deletions
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index bf39e70127..3a552547b8 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -31,6 +31,7 @@ import ( "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -75,12 +76,12 @@ const ( // DiffLine represents a line difference in a DiffSection. type DiffLine struct { - LeftIdx int - RightIdx int - Match int + LeftIdx int // line number, 1-based + RightIdx int // line number, 1-based + Match int // line number, 1-based Type DiffLineType Content string - Comments issues_model.CommentList + Comments issues_model.CommentList // related PR code comments SectionInfo *DiffLineSectionInfo } @@ -95,9 +96,18 @@ type DiffLineSectionInfo struct { RightHunkSize int } +// DiffHTMLOperation is the HTML version of diffmatchpatch.Diff +type DiffHTMLOperation struct { + Type diffmatchpatch.Operation + HTML template.HTML +} + // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 +// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff" +const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024 + // GetType returns the type of DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) @@ -112,8 +122,9 @@ func (d *DiffLine) GetHTMLDiffLineType() string { return "del" case DiffLineSection: return "tag" + default: + return "same" } - return "same" } // CanComment returns whether a line can get commented @@ -196,38 +207,6 @@ type DiffSection struct { Lines []*DiffLine } -var ( - addedCodePrefix = []byte(`<span class="added-code">`) - removedCodePrefix = []byte(`<span class="removed-code">`) - codeTagSuffix = []byte(`</span>`) -) - -func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { - buf := bytes.NewBuffer(nil) - // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary - for _, tag := range lineWrapperTags { - buf.WriteString(tag) - } - for _, diff := range diffs { - switch { - case diff.Type == diffmatchpatch.DiffEqual: - buf.WriteString(diff.Text) - case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: - buf.Write(addedCodePrefix) - buf.WriteString(diff.Text) - buf.Write(codeTagSuffix) - case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: - buf.Write(removedCodePrefix) - buf.WriteString(diff.Text) - buf.Write(codeTagSuffix) - } - } - for range lineWrapperTags { - buf.WriteString("</span>") - } - return buf.String() -} - // GetLine gets a specific line by type (add or del) and file line number func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine { var ( @@ -271,10 +250,10 @@ LOOP: return nil } -var diffMatchPatch = diffmatchpatch.New() - -func init() { - diffMatchPatch.DiffEditCost = 100 +func defaultDiffMatchPatch() *diffmatchpatch.DiffMatchPatch { + dmp := diffmatchpatch.New() + dmp.DiffEditCost = 100 + return dmp } // DiffInline is a struct that has a content and escape status @@ -283,97 +262,114 @@ type DiffInline struct { Content template.HTML } -// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped +// DiffInlineWithUnicodeEscape makes a DiffInline with hidden Unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { status, content := charset.EscapeControlHTML(s, locale) return DiffInline{EscapeStatus: status, Content: content} } -// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped -func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { - highlighted, _ := highlight.Code(fileName, language, code) - status, content := charset.EscapeControlHTML(highlighted, locale) - return DiffInline{EscapeStatus: status, Content: content} -} - -// GetComputedInlineDiffFor computes inline diff for the given line. -func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { +func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *DiffLine, fileLanguage string, highlightLines map[int]template.HTML) template.HTML { + h, ok := highlightLines[lineIdx-1] + if ok { + return h + } + if diffLine.Content == "" { + return "" + } if setting.Git.DisableDiffHighlight { - return getLineContent(diffLine.Content[1:], locale) + return template.HTML(html.EscapeString(diffLine.Content[1:])) } + h, _ = highlight.Code(diffSection.Name, fileLanguage, diffLine.Content[1:]) + return h +} - var ( - compareDiffLine *DiffLine - diff1 string - diff2 string - ) - - language := "" +func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline { + var fileLanguage string + var highlightedLeftLines, highlightedRightLines map[int]template.HTML + // when a "diff section" is manually prepared by ExcerptBlob, it doesn't have "file" information if diffSection.file != nil { - language = diffSection.file.Language + fileLanguage = diffSection.file.Language + highlightedLeftLines, highlightedRightLines = diffSection.file.highlightedLeftLines, diffSection.file.highlightedRightLines + } + + hcd := newHighlightCodeDiff() + var diff1, diff2, lineHTML template.HTML + if leftLine != nil { + diff1 = diffSection.getLineContentForRender(leftLine.LeftIdx, leftLine, fileLanguage, highlightedLeftLines) + lineHTML = util.Iif(diffLineType == DiffLinePlain, diff1, "") + } + if rightLine != nil { + diff2 = diffSection.getLineContentForRender(rightLine.RightIdx, rightLine, fileLanguage, highlightedRightLines) + lineHTML = util.Iif(diffLineType == DiffLinePlain, diff2, "") } + if diffLineType != DiffLinePlain { + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffLineWithHighlightWrapper(hcd.lineWrapperTags. ...)" + lineHTML = hcd.diffLineWithHighlight(diffLineType, diff1, diff2) + } + return DiffInlineWithUnicodeEscape(lineHTML, locale) +} +// GetComputedInlineDiffFor computes inline diff for the given line. +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: return getLineContent(diffLine.Content[1:], locale) case DiffLineAdd: - compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) - if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - diff1 = compareDiffLine.Content - diff2 = diffLine.Content + compareDiffLine := diffSection.GetLine(DiffLineDel, diffLine.RightIdx) + return diffSection.getDiffLineForRender(DiffLineAdd, compareDiffLine, diffLine, locale) case DiffLineDel: - compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) - if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - diff1 = diffLine.Content - diff2 = compareDiffLine.Content - default: - if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale) + compareDiffLine := diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) + return diffSection.getDiffLineForRender(DiffLineDel, diffLine, compareDiffLine, locale) + default: // Plain + // TODO: there was an "if" check: `if diffLine.Content >strings.IndexByte(" +-", diffLine.Content[0]) > -1 { ... } else { ... }` + // no idea why it needs that check, it seems that the "if" should be always true, so try to simplify the code + return diffSection.getDiffLineForRender(DiffLinePlain, nil, diffLine, locale) } - - hcd := newHighlightCodeDiff() - diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) - // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back - // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" - diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) - return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale) } // DiffFile represents a file diff. type DiffFile struct { - Name string - NameHash string - OldName string - Index int - Addition, Deletion int - Type DiffFileType - IsCreated bool - IsDeleted bool - IsBin bool - IsLFSFile bool - IsRenamed bool - IsAmbiguous bool - Sections []*DiffSection - IsIncomplete bool - IsIncompleteLineTooLong bool - IsProtected bool - IsGenerated bool - IsVendored bool + // only used internally to parse Ambiguous filenames + isAmbiguous bool + + // basic fields (parsed from diff result) + Name string + NameHash string + OldName string + Addition int + Deletion int + Type DiffFileType + Mode string + OldMode string + IsCreated bool + IsDeleted bool + IsBin bool + IsLFSFile bool + IsRenamed bool + IsSubmodule bool + // basic fields but for render purpose only + Sections []*DiffSection + IsIncomplete bool + IsIncompleteLineTooLong bool + + // will be filled by the extra loop in GitDiffForRender + Language string + IsGenerated bool + IsVendored bool + SubmoduleDiffInfo *SubmoduleDiffInfo // IsSubmodule==true, then there must be a SubmoduleDiffInfo + + // will be filled by route handler + IsProtected bool + + // will be filled by SyncUserSpecificDiff IsViewed bool // User specific HasChangedSinceLastReview bool // User specific - Language string - Mode string - OldMode string - IsSubmodule bool // if IsSubmodule==true, then there must be a SubmoduleDiffInfo - SubmoduleDiffInfo *SubmoduleDiffInfo + // for render purpose only, will be filled by the extra loop in GitDiffForRender + highlightedLeftLines map[int]template.HTML + highlightedRightLines map[int]template.HTML } // GetType returns type of diff file. @@ -381,18 +377,23 @@ func (diffFile *DiffFile) GetType() int { return int(diffFile.Type) } -// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file -func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *DiffSection { +type DiffLimitedContent struct { + LeftContent, RightContent *limitByteWriter +} + +// GetTailSectionAndLimitedContent creates a fake DiffLineSection if the last section is not the end of the file +func (diffFile *DiffFile) GetTailSectionAndLimitedContent(leftCommit, rightCommit *git.Commit) (_ *DiffSection, diffLimitedContent DiffLimitedContent) { if len(diffFile.Sections) == 0 || leftCommit == nil || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { - return nil + return nil, diffLimitedContent } lastSection := diffFile.Sections[len(diffFile.Sections)-1] lastLine := lastSection.Lines[len(lastSection.Lines)-1] - leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) - rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name) + leftLineCount, leftContent := getCommitFileLineCountAndLimitedContent(leftCommit, diffFile.Name) + rightLineCount, rightContent := getCommitFileLineCountAndLimitedContent(rightCommit, diffFile.Name) + diffLimitedContent = DiffLimitedContent{LeftContent: leftContent, RightContent: rightContent} if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx { - return nil + return nil, diffLimitedContent } tailDiffLine := &DiffLine{ Type: DiffLineSection, @@ -406,7 +407,7 @@ func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *D }, } tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}} - return tailSection + return tailSection, diffLimitedContent } // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted @@ -438,16 +439,29 @@ func (diffFile *DiffFile) ModeTranslationKey(mode string) string { } } -func getCommitFileLineCount(commit *git.Commit, filePath string) int { +type limitByteWriter struct { + buf bytes.Buffer + limit int +} + +func (l *limitByteWriter) Write(p []byte) (n int, err error) { + if l.buf.Len()+len(p) > l.limit { + p = p[:l.limit-l.buf.Len()] + } + return l.buf.Write(p) +} + +func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string) (lineCount int, limitWriter *limitByteWriter) { blob, err := commit.GetBlobByPath(filePath) if err != nil { - return 0 + return 0, nil } - lineCount, err := blob.GetBlobLineCount() + w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1} + lineCount, err = blob.GetBlobLineCount(w) if err != nil { - return 0 + return 0, nil } - return lineCount + return lineCount, w } // Diff represents a difference between two git trees. @@ -526,13 +540,13 @@ parsingLoop: } if maxFiles > -1 && len(diff.Files) >= maxFiles { - lastFile := createDiffFile(diff, line) + lastFile := createDiffFile(line) diff.End = lastFile.Name diff.IsIncomplete = true break parsingLoop } - curFile = createDiffFile(diff, line) + curFile = createDiffFile(line) if skipping { if curFile.Name != skipToFile { line, err = skipToNextDiffHead(input) @@ -615,28 +629,28 @@ parsingLoop: case strings.HasPrefix(line, "rename from "): curFile.IsRenamed = true curFile.Type = DiffFileRename - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.OldName = prepareValue(line, "rename from ") } case strings.HasPrefix(line, "rename to "): curFile.IsRenamed = true curFile.Type = DiffFileRename - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.Name = prepareValue(line, "rename to ") - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } case strings.HasPrefix(line, "copy from "): curFile.IsRenamed = true curFile.Type = DiffFileCopy - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.OldName = prepareValue(line, "copy from ") } case strings.HasPrefix(line, "copy to "): curFile.IsRenamed = true curFile.Type = DiffFileCopy - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.Name = prepareValue(line, "copy to ") - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } case strings.HasPrefix(line, "new file"): curFile.Type = DiffFileAdd @@ -663,7 +677,7 @@ parsingLoop: curFile.IsBin = true case strings.HasPrefix(line, "--- "): // Handle ambiguous filenames - if curFile.IsAmbiguous { + if curFile.isAmbiguous { // The shortest string that can end up here is: // "--- a\t\n" without the quotes. // This line has a len() of 7 but doesn't contain a oldName. @@ -681,7 +695,7 @@ parsingLoop: // Otherwise do nothing with this line case strings.HasPrefix(line, "+++ "): // Handle ambiguous filenames - if curFile.IsAmbiguous { + if curFile.isAmbiguous { if len(line) > 6 && line[4] == 'b' { curFile.Name = line[6 : len(line)-1] if line[len(line)-2] == '\t' { @@ -693,7 +707,7 @@ parsingLoop: } else { curFile.Name = curFile.OldName } - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } // Otherwise do nothing with this line, but now switch to parsing hunks lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input) @@ -1006,7 +1020,7 @@ func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharact } } -func createDiffFile(diff *Diff, line string) *DiffFile { +func createDiffFile(line string) *DiffFile { // The a/ and b/ filenames are the same unless rename/copy is involved. // Especially, even for a creation or a deletion, /dev/null is not used // in place of the a/ or b/ filenames. @@ -1017,12 +1031,11 @@ func createDiffFile(diff *Diff, line string) *DiffFile { // // Path names are quoted if necessary. // - // This means that you should always be able to determine the file name even when there + // This means that you should always be able to determine the file name even when // there is potential ambiguity... // // but we can be simpler with our heuristics by just forcing git to prefix things nicely curFile := &DiffFile{ - Index: len(diff.Files) + 1, Type: DiffFileChange, Sections: make([]*DiffSection, 0, 10), } @@ -1034,7 +1047,7 @@ func createDiffFile(diff *Diff, line string) *DiffFile { curFile.OldName, oldNameAmbiguity = readFileName(rd) curFile.Name, newNameAmbiguity = readFileName(rd) if oldNameAmbiguity && newNameAmbiguity { - curFile.IsAmbiguous = true + curFile.isAmbiguous = true // OK we should bet that the oldName and the newName are the same if they can be made to be same // So we need to start again ... if (len(line)-len(cmdDiffHead)-1)%2 == 0 { @@ -1121,20 +1134,21 @@ func guessBeforeCommitForDiff(gitRepo *git.Repository, beforeCommitID string, af return actualBeforeCommit, actualBeforeCommitID, nil } -// GetDiff builds a Diff between two commits of a repository. +// getDiffBasic builds a Diff between two commits of a repository. // Passing the empty string as beforeCommitID returns a diff from the parent commit. // The whitespaceBehavior is either an empty string or a git flag -func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { +// Returned beforeCommit could be nil if the afterCommit doesn't have parent commit +func getDiffBasic(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (_ *Diff, beforeCommit, afterCommit *git.Commit, err error) { repoPath := gitRepo.Path - afterCommit, err := gitRepo.GetCommit(opts.AfterCommitID) + afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID) if err != nil { - return nil, err + return nil, nil, nil, err } - actualBeforeCommit, actualBeforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit) + beforeCommit, beforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit) if err != nil { - return nil, err + return nil, nil, nil, err } cmdDiff := git.NewCommand(). @@ -1150,7 +1164,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi parsePatchSkipToFile = "" } - cmdDiff.AddDynamicArguments(actualBeforeCommitID.String(), opts.AfterCommitID) + cmdDiff.AddDynamicArguments(beforeCommitID.String(), opts.AfterCommitID) cmdDiff.AddDashesAndList(files...) cmdCtx, cmdCancel := context.WithCancel(ctx) @@ -1180,12 +1194,25 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi // Ensure the git process is killed if it didn't exit already cmdCancel() if err != nil { - return nil, fmt.Errorf("unable to ParsePatch: %w", err) + return nil, nil, nil, fmt.Errorf("unable to ParsePatch: %w", err) } diff.Start = opts.SkipTo + return diff, beforeCommit, afterCommit, nil +} - checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID) - defer deferable() +func GetDiffForAPI(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + diff, _, _, err := getDiffBasic(ctx, gitRepo, opts, files...) + return diff, err +} + +func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + diff, beforeCommit, afterCommit, err := getDiffBasic(ctx, gitRepo, opts, files...) + if err != nil { + return nil, err + } + + checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID) + defer deferrable() for _, diffFile := range diff.Files { isVendored := optional.None[bool]() @@ -1205,7 +1232,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi // Populate Submodule URLs if diffFile.SubmoduleDiffInfo != nil { - diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, actualBeforeCommit, afterCommit) + diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, beforeCommit, afterCommit) } if !isVendored.Has() { @@ -1217,15 +1244,46 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi isGenerated = optional.Some(analyze.IsGenerated(diffFile.Name)) } diffFile.IsGenerated = isGenerated.Value() - - tailSection := diffFile.GetTailSection(actualBeforeCommit, afterCommit) + tailSection, limitedContent := diffFile.GetTailSectionAndLimitedContent(beforeCommit, afterCommit) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) } + + if !setting.Git.DisableDiffHighlight { + if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize { + diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String()) + } + if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize { + diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String()) + } + } } + return diff, nil } +func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML { + highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content) + splitLines := strings.Split(string(highlightedNewContent), "\n") + lines := make(map[int]template.HTML, len(splitLines)) + // only save the highlighted lines we need, but not the whole file, to save memory + for _, sec := range diffFile.Sections { + for _, ln := range sec.Lines { + lineIdx := ln.LeftIdx + if !isLeft { + lineIdx = ln.RightIdx + } + if lineIdx >= 1 { + idx := lineIdx - 1 + if idx < len(splitLines) { + lines[idx] = template.HTML(splitLines[idx]) + } + } + } + } + return lines +} + type DiffShortStat struct { NumFiles, TotalAddition, TotalDeletion int } diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 41b4077cf2..71394b1915 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -17,27 +17,10 @@ import ( "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" - dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestDiffToHTML(t *testing.T) { - assert.Equal(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "foo "}, - {Type: dmp.DiffInsert, Text: "bar"}, - {Type: dmp.DiffDelete, Text: " baz"}, - {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd)) - - assert.Equal(t, "foo <span class=\"removed-code\">bar</span> biz", diffToHTML(nil, []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "foo "}, - {Type: dmp.DiffDelete, Text: "bar"}, - {Type: dmp.DiffInsert, Text: " baz"}, - {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel)) -} - func TestParsePatch_skipTo(t *testing.T) { type testcase struct { name string @@ -621,7 +604,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { defer gitRepo.Close() for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} { - diffs, err := GetDiff(t.Context(), gitRepo, + diffs, err := GetDiffForAPI(t.Context(), gitRepo, &DiffOptions{ AfterCommitID: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd", BeforeCommitID: "72866af952e98d02a73003501836074b286a78f6", diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index 35d4844550..5891e61249 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -4,10 +4,10 @@ package gitdiff import ( + "bytes" + "html/template" "strings" - "code.gitea.io/gitea/modules/highlight" - "github.com/sergi/go-diff/diffmatchpatch" ) @@ -77,7 +77,7 @@ func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) } -func (hcd *highlightCodeDiff) collectUsedRunes(code string) { +func (hcd *highlightCodeDiff) collectUsedRunes(code template.HTML) { for _, r := range code { if hcd.isInPlaceholderRange(r) { // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. @@ -86,27 +86,76 @@ func (hcd *highlightCodeDiff) collectUsedRunes(code string) { } } -func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { +func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA, codeB template.HTML) template.HTML { + return hcd.diffLineWithHighlightWrapper(nil, lineType, codeA, codeB) +} + +func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []string, lineType DiffLineType, codeA, codeB template.HTML) template.HTML { hcd.collectUsedRunes(codeA) hcd.collectUsedRunes(codeB) - highlightCodeA, _ := highlight.Code(filename, language, codeA) - highlightCodeB, _ := highlight.Code(filename, language, codeB) + convertedCodeA := hcd.convertToPlaceholders(codeA) + convertedCodeB := hcd.convertToPlaceholders(codeB) + + dmp := defaultDiffMatchPatch() + diffs := dmp.DiffMain(convertedCodeA, convertedCodeB, true) + diffs = dmp.DiffCleanupEfficiency(diffs) + + buf := bytes.NewBuffer(nil) - convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA)) - convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB)) + // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary + for _, tag := range lineWrapperTags { + buf.WriteString(tag) + } - diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true) - diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + addedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="added-code">`) + removedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="removed-code">`) + codeTagSuffix := hcd.registerTokenAsPlaceholder(`</span>`) - for i := range diffs { - hcd.recoverOneDiff(&diffs[i]) + if codeTagSuffix != 0 { + for _, diff := range diffs { + switch { + case diff.Type == diffmatchpatch.DiffEqual: + buf.WriteString(diff.Text) + case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: + buf.WriteRune(addedCodePrefix) + buf.WriteString(diff.Text) + buf.WriteRune(codeTagSuffix) + case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: + buf.WriteRune(removedCodePrefix) + buf.WriteString(diff.Text) + buf.WriteRune(codeTagSuffix) + } + } + } else { + // placeholder map space is exhausted + for _, diff := range diffs { + take := diff.Type == diffmatchpatch.DiffEqual || (diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd) || (diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel) + if take { + buf.WriteString(diff.Text) + } + } } - return diffs + for range lineWrapperTags { + buf.WriteString("</span>") + } + return hcd.recoverOneDiff(buf.String()) +} + +func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune { + placeholder, ok := hcd.tokenPlaceholderMap[token] + if !ok { + placeholder = hcd.nextPlaceholder() + if placeholder != 0 { + hcd.tokenPlaceholderMap[token] = placeholder + hcd.placeholderTokenMap[placeholder] = token + } + } + return placeholder } // convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. -func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { +func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) string { var tagStack []string res := strings.Builder{} @@ -115,6 +164,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { var beforeToken, token string var valid bool + htmlCode := string(htmlContent) // the standard chroma highlight HTML is "<span class="line [hl]"><span class="cl"> ... </span></span>" for { beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) @@ -151,14 +201,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { } // else: impossible // remember the placeholder and token in the map - placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] - if !ok { - placeholder = hcd.nextPlaceholder() - if placeholder != 0 { - hcd.tokenPlaceholderMap[tokenInMap] = placeholder - hcd.placeholderTokenMap[placeholder] = tokenInMap - } - } + placeholder := hcd.registerTokenAsPlaceholder(tokenInMap) if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the token @@ -179,11 +222,11 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { return res.String() } -func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { +func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML { sb := strings.Builder{} var tagStack []string - for _, r := range diff.Text { + for _, r := range str { token, ok := hcd.placeholderTokenMap[r] if !ok || token == "" { sb.WriteRune(r) // if the rune is not a placeholder, write it as it is @@ -217,6 +260,5 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag } } - - diff.Text = sb.String() + return template.HTML(sb.String()) } diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index 545a060e20..16649682b4 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -5,121 +5,72 @@ package gitdiff import ( "fmt" + "html/template" "strings" "testing" - "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" ) func TestDiffWithHighlight(t *testing.T) { - hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.v", "", - " run('<>')\n", - " run(db)\n", - ) - - expected := ` <span class="n">run</span><span class="o">(</span><span class="removed-code"><span class="k">'</span><span class="o"><</span><span class="o">></span><span class="k">'</span></span><span class="o">)</span>` - output := diffToHTML(nil, diffs, DiffLineDel) - assert.Equal(t, expected, output) - - expected = ` <span class="n">run</span><span class="o">(</span><span class="added-code"><span class="n">db</span></span><span class="o">)</span>` - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) - - hcd = newHighlightCodeDiff() - hcd.placeholderTokenMap['O'] = "<span>" - hcd.placeholderTokenMap['C'] = "</span>" - diff := diffmatchpatch.Diff{} - - diff.Text = "OC" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "<span></span>", diff.Text) - - diff.Text = "O" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "<span></span>", diff.Text) - - diff.Text = "C" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "", diff.Text) + t.Run("DiffLineAddDel", func(t *testing.T) { + hcd := newHighlightCodeDiff() + codeA := template.HTML(`x <span class="k">foo</span> y`) + codeB := template.HTML(`x <span class="k">bar</span> y`) + outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB) + assert.Equal(t, `x <span class="k"><span class="removed-code">foo</span></span> y`, string(outDel)) + outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB) + assert.Equal(t, `x <span class="k"><span class="added-code">bar</span></span> y`, string(outAdd)) + }) + + t.Run("OpenCloseTags", func(t *testing.T) { + hcd := newHighlightCodeDiff() + hcd.placeholderTokenMap['O'], hcd.placeholderTokenMap['C'] = "<span>", "</span>" + assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("OC"))) + assert.Equal(t, "<span></span>", string(hcd.recoverOneDiff("O"))) + assert.Equal(t, "", string(hcd.recoverOneDiff("C"))) + }) } func TestDiffWithHighlightPlaceholder(t *testing.T) { hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.js", "", - "a='\U00100000'", - "a='\U0010FFFD''", - ) + output := hcd.diffLineWithHighlight(DiffLineDel, "a='\U00100000'", "a='\U0010FFFD''") assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000]) assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD]) - - expected := fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">'</span><span class="removed-code">%s</span>'`, "\U00100000") - output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) - assert.Equal(t, expected, output) + expected := fmt.Sprintf(`a='<span class="removed-code">%s</span>'`, "\U00100000") + assert.Equal(t, expected, string(output)) hcd = newHighlightCodeDiff() - diffs = hcd.diffWithHighlight( - "main.js", "", - "a='\U00100000'", - "a='\U0010FFFD'", - ) - expected = fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">'</span><span class="added-code">%s</span>'`, "\U0010FFFD") - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) + output = hcd.diffLineWithHighlight(DiffLineAdd, "a='\U00100000'", "a='\U0010FFFD'") + expected = fmt.Sprintf(`a='<span class="added-code">%s</span>'`, "\U0010FFFD") + assert.Equal(t, expected, string(output)) } func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { hcd := newHighlightCodeDiff() hcd.placeholderMaxCount = 0 - diffs := hcd.diffWithHighlight( - "main.js", "", - "'", - ``, - ) - output := diffToHTML(nil, diffs, DiffLineDel) - expected := fmt.Sprintf(`<span class="removed-code">%s#39;</span>`, "\uFFFD") - assert.Equal(t, expected, output) - - hcd = newHighlightCodeDiff() - hcd.placeholderMaxCount = 0 - diffs = hcd.diffWithHighlight( - "main.js", "", - "a < b", - "a > b", - ) - output = diffToHTML(nil, diffs, DiffLineDel) - expected = fmt.Sprintf(`a %s<span class="removed-code">l</span>t; b`, "\uFFFD") - assert.Equal(t, expected, output) - - output = diffToHTML(nil, diffs, DiffLineAdd) - expected = fmt.Sprintf(`a %s<span class="added-code">g</span>t; b`, "\uFFFD") - assert.Equal(t, expected, output) + placeHolderAmp := string(rune(0xFFFD)) + output := hcd.diffLineWithHighlight(DiffLineDel, `<span class="k"><</span>`, `<span class="k">></span>`) + assert.Equal(t, placeHolderAmp+"lt;", string(output)) + output = hcd.diffLineWithHighlight(DiffLineAdd, `<span class="k"><</span>`, `<span class="k">></span>`) + assert.Equal(t, placeHolderAmp+"gt;", string(output)) } func TestDiffWithHighlightTagMatch(t *testing.T) { - totalOverflow := 0 - for i := 0; i < 100; i++ { - hcd := newHighlightCodeDiff() - hcd.placeholderMaxCount = i - diffs := hcd.diffWithHighlight( - "main.js", "", - "a='1'", - "b='2'", - ) - totalOverflow += hcd.placeholderOverflowCount - - output := diffToHTML(nil, diffs, DiffLineDel) - c1 := strings.Count(output, "<span") - c2 := strings.Count(output, "</span") - assert.Equal(t, c1, c2) - - output = diffToHTML(nil, diffs, DiffLineAdd) - c1 = strings.Count(output, "<span") - c2 = strings.Count(output, "</span") - assert.Equal(t, c1, c2) + f := func(t *testing.T, lineType DiffLineType) { + totalOverflow := 0 + for i := 0; ; i++ { + hcd := newHighlightCodeDiff() + hcd.placeholderMaxCount = i + output := string(hcd.diffLineWithHighlight(lineType, `<span class="k"><</span>`, `<span class="k">></span>`)) + totalOverflow += hcd.placeholderOverflowCount + assert.Equal(t, strings.Count(output, "<span"), strings.Count(output, "</span")) + if hcd.placeholderOverflowCount == 0 { + break + } + } + assert.NotZero(t, totalOverflow) } - assert.NotZero(t, totalOverflow) + t.Run("DiffLineAdd", func(t *testing.T) { f(t, DiffLineAdd) }) + t.Run("DiffLineDel", func(t *testing.T) { f(t, DiffLineDel) }) } diff --git a/services/repository/files/diff_test.go b/services/repository/files/diff_test.go index 38cb1d675b..57920a2c4f 100644 --- a/services/repository/files/diff_test.go +++ b/services/repository/files/diff_test.go @@ -35,7 +35,6 @@ func TestGetDiffPreview(t *testing.T) { Name: "README.md", OldName: "README.md", NameHash: "8ec9a00bfd09b3190ac6b22251dbb1aa95a0579d", - Index: 1, Addition: 2, Deletion: 1, Type: 2, |