diff options
author | zeripath <art27@cantab.net> | 2021-11-17 20:37:00 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-17 20:37:00 +0000 |
commit | 3c4724d70e4ac7bfc06b97f6fad8936f97479b6b (patch) | |
tree | 754286def789b823e020d3ccfafae148d0017b62 /modules | |
parent | 81a4fc752833101dd7d6b4f612bccc4b29c98dff (diff) | |
download | gitea-3c4724d70e4ac7bfc06b97f6fad8936f97479b6b.tar.gz gitea-3c4724d70e4ac7bfc06b97f6fad8936f97479b6b.zip |
Add .gitattribute assisted language detection to blame, diff and render (#17590)
Use check attribute code to check the assigned language of a file and send that in to
chroma as a hint for the language of the file.
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'modules')
-rw-r--r-- | modules/git/repo_attribute.go | 17 | ||||
-rw-r--r-- | modules/git/repo_index.go | 12 | ||||
-rw-r--r-- | modules/git/repo_language_stats_gogit.go | 61 | ||||
-rw-r--r-- | modules/git/repo_language_stats_nogogit.go | 62 | ||||
-rw-r--r-- | modules/highlight/highlight.go | 36 | ||||
-rw-r--r-- | modules/highlight/highlight_test.go | 2 | ||||
-rw-r--r-- | modules/indexer/code/search.go | 2 | ||||
-rw-r--r-- | modules/repofiles/diff_test.go | 13 |
8 files changed, 134 insertions, 71 deletions
diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index aace644253..88fb7810a6 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -22,6 +22,8 @@ type CheckAttributeOpts struct { AllAttributes bool Attributes []string Filenames []string + IndexFile string + WorkTree string } // CheckAttribute return the Blame object of file @@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ return nil, fmt.Errorf("git version missing: %v", err) } + env := []string{} + + if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) + } + if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + env = append(env, "GIT_WORK_TREE="+opts.WorkTree) + } + + if len(env) > 0 { + env = append(os.Environ(), env...) + } + stdOut := new(bytes.Buffer) stdErr := new(bytes.Buffer) @@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ cmd := NewCommand(cmdArgs...) - if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil { + if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil { return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) } diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go index 27cb7fbebe..38c01295b6 100644 --- a/modules/git/repo_index.go +++ b/modules/git/repo_index.go @@ -8,6 +8,7 @@ import ( "bytes" "context" "os" + "path/filepath" "strings" "code.gitea.io/gitea/modules/log" @@ -45,14 +46,15 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error } // ReadTreeToTemporaryIndex reads a treeish to a temporary index file -func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) { - tmpIndex, err := os.CreateTemp("", "index") +func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) { + tmpDir, err = os.MkdirTemp("", "index") if err != nil { return } - filename = tmpIndex.Name() + + filename = filepath.Join(tmpDir, ".tmp-index") cancel = func() { - err := util.Remove(filename) + err := util.RemoveAll(tmpDir) if err != nil { log.Error("failed to remove tmp index file: %v", err) } @@ -60,7 +62,7 @@ func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename strin err = repo.ReadTreeToIndex(treeish, filename) if err != nil { defer cancel() - return "", func() {}, err + return "", "", func() {}, err } return } diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index d37827c3de..037ec41ec6 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -11,11 +11,10 @@ import ( "bytes" "context" "io" - "os" + "strings" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" "github.com/go-git/go-git/v5" @@ -48,35 +47,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err var checker *CheckAttributeReader if CheckGitVersionAtLeast("1.7.8") == nil { - indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) if err == nil { defer deleteTemporaryFile() - tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir") - if err == nil { - defer func() { - _ = util.RemoveAll(tmpWorkTree) + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"}, + Repo: repo, + IndexFile: indexFilename, + WorkTree: workTree, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } }() - - checker = &CheckAttributeReader{ - Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, - Repo: repo, - IndexFile: indexFilename, - WorkTree: tmpWorkTree, - } - ctx, cancel := context.WithCancel(DefaultContext) - if err := checker.Init(ctx); err != nil { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - } else { - go func() { - err = checker.Run() - if err != nil { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - cancel() - } - }() - } - defer cancel() } + defer cancel() } } @@ -114,6 +106,21 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err sizes[language] += f.Size return nil + } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { + // strip off a ? if present + if idx := strings.IndexByte(language, '?'); idx >= 0 { + language = language[:idx] + } + if len(language) != 0 { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) != 0 { + language = group + } + + sizes[language] += f.Size + return nil + } } } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 06269a466c..4fda7ab627 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -13,11 +13,10 @@ import ( "context" "io" "math" - "os" + "strings" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/util" "github.com/go-enry/go-enry/v2" ) @@ -68,35 +67,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err var checker *CheckAttributeReader if CheckGitVersionAtLeast("1.7.8") == nil { - indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) if err == nil { defer deleteTemporaryFile() - tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir") - if err == nil { - defer func() { - _ = util.RemoveAll(tmpWorkTree) + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"}, + Repo: repo, + IndexFile: indexFilename, + WorkTree: worktree, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } }() - - checker = &CheckAttributeReader{ - Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, - Repo: repo, - IndexFile: indexFilename, - WorkTree: tmpWorkTree, - } - ctx, cancel := context.WithCancel(DefaultContext) - if err := checker.Init(ctx); err != nil { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - } else { - go func() { - err = checker.Run() - if err != nil { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - cancel() - } - }() - } - defer cancel() } + defer cancel() } } @@ -138,7 +130,23 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err sizes[language] += f.Size() continue + } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { + // strip off a ? if present + if idx := strings.IndexByte(language, '?'); idx >= 0 { + language = language[:idx] + } + if len(language) != 0 { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) != 0 { + language = group + } + + sizes[language] += f.Size() + continue + } } + } } diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 9a876d2a6b..04bd30bceb 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -55,7 +55,7 @@ func NewContext() { } // Code returns a HTML version of code string with chroma syntax highlighting classes -func Code(fileName, code string) string { +func Code(fileName, language, code string) string { NewContext() // diff view newline will be passed as empty, change to literal \n so it can be copied @@ -69,9 +69,23 @@ func Code(fileName, code string) string { } var lexer chroma.Lexer - if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { - //use mapped value to find lexer - lexer = lexers.Get(val) + + if len(language) > 0 { + lexer = lexers.Get(language) + + if lexer == nil { + // Attempt stripping off the '?' + if idx := strings.IndexByte(language, '?'); idx > 0 { + lexer = lexers.Get(language[:idx]) + } + } + } + + if lexer == nil { + if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { + //use mapped value to find lexer + lexer = lexers.Get(val) + } } if lexer == nil { @@ -119,7 +133,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { } // File returns a slice of chroma syntax highlighted lines of code -func File(numLines int, fileName string, code []byte) []string { +func File(numLines int, fileName, language string, code []byte) []string { NewContext() if len(code) > sizeLimit { @@ -139,8 +153,16 @@ func File(numLines int, fileName string, code []byte) []string { htmlw := bufio.NewWriter(&htmlbuf) var lexer chroma.Lexer - if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { - lexer = lexers.Get(val) + + // provided language overrides everything + if len(language) > 0 { + lexer = lexers.Get(language) + } + + if lexer == nil { + if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { + lexer = lexers.Get(val) + } } if lexer == nil { diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 29a15c0b53..3f47b6a48f 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -96,7 +96,7 @@ steps: for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := File(tt.numLines, tt.fileName, []byte(tt.code)); !reflect.DeepEqual(got, tt.want) { + if got := File(tt.numLines, tt.fileName, "", []byte(tt.code)); !reflect.DeepEqual(got, tt.want) { t.Errorf("File() = %v, want %v", got, tt.want) } }) diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 51b7c9427d..bb8dcf16b3 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -101,7 +101,7 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro Language: result.Language, Color: result.Color, LineNumbers: lineNumbers, - FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()), + FormattedLines: highlight.Code(result.Filename, "", formattedLinesBuffer.String()), }, nil } diff --git a/modules/repofiles/diff_test.go b/modules/repofiles/diff_test.go index 463ce4ec67..4bd1ef6f4d 100644 --- a/modules/repofiles/diff_test.go +++ b/modules/repofiles/diff_test.go @@ -9,6 +9,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/test" "code.gitea.io/gitea/services/gitdiff" @@ -118,13 +119,21 @@ func TestGetDiffPreview(t *testing.T) { t.Run("with given branch", func(t *testing.T) { diff, err := GetDiffPreview(ctx.Repo.Repository, branch, treePath, content) assert.NoError(t, err) - assert.EqualValues(t, expectedDiff, diff) + expectedBs, err := json.Marshal(expectedDiff) + assert.NoError(t, err) + bs, err := json.Marshal(diff) + assert.NoError(t, err) + assert.EqualValues(t, expectedBs, bs) }) t.Run("empty branch, same results", func(t *testing.T) { diff, err := GetDiffPreview(ctx.Repo.Repository, "", treePath, content) assert.NoError(t, err) - assert.EqualValues(t, expectedDiff, diff) + expectedBs, err := json.Marshal(expectedDiff) + assert.NoError(t, err) + bs, err := json.Marshal(diff) + assert.NoError(t, err) + assert.EqualValues(t, expectedBs, bs) }) } |