]> source.dussan.org Git - gitea.git/commitdiff
improve performance of diffs (#32393)
authorRowan Bohde <rowan.bohde@gmail.com>
Sat, 2 Nov 2024 03:29:37 +0000 (22:29 -0500)
committerGitHub <noreply@github.com>
Sat, 2 Nov 2024 03:29:37 +0000 (11:29 +0800)
This has two major changes that significantly reduce the amount of work
done for large diffs:

* Kill a running git process when reaching the maximum number of files
in a diff, preventing it from processing the entire diff.
* When loading a diff with the URL param `file-only=true`, skip loading
stats. This speeds up loading both hidden files of a diff and sections
of a diff when clicking the "Show More" button.

A couple of minor things from profiling are also included:

* Reuse existing repo in `PrepareViewPullInfo` if head and base are the
same.

The performance impact is going to depend heavily on the individual diff
and the hardware it runs on, but when testing locally on a diff changing
100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction
in time to load the result of "Show More"

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
routers/web/repo/commit.go
routers/web/repo/compare.go
routers/web/repo/pull.go
services/gitdiff/gitdiff.go
services/repository/files/temp_repo.go

index 0e4e10bf50944643b7d8553f3f99d28f3a0ac31a..d7865e18d63f9ff273499502dd46f52ae2bb0483 100644 (file)
@@ -328,6 +328,7 @@ func Diff(ctx *context.Context) {
                MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters,
                MaxFiles:           maxFiles,
                WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
+               FileOnly:           fileOnly,
        }, files...)
        if err != nil {
                ctx.NotFound("GetDiff", err)
index e6a04782e5bbf17f777308ee265902e82a11014e..39279728375f00aa53a78783dd8b7f93c1512868 100644 (file)
@@ -611,6 +611,8 @@ func PrepareCompareDiff(
                maxLines, maxFiles = -1, -1
        }
 
+       fileOnly := ctx.FormBool("file-only")
+
        diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
                &gitdiff.DiffOptions{
                        BeforeCommitID:     beforeCommitID,
@@ -621,6 +623,7 @@ func PrepareCompareDiff(
                        MaxFiles:           maxFiles,
                        WhitespaceBehavior: whitespaceBehavior,
                        DirectComparison:   ci.DirectComparison,
+                       FileOnly:           fileOnly,
                }, ctx.FormStrings("files")...)
        if err != nil {
                ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err)
index 0efe1be76a310fdcc0a3ca478009d4b887a6fdcb..cc554a71ff6052981bbbfd612f927ecb08c846ad 100644 (file)
@@ -395,12 +395,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C
        var headBranchSha string
        // HeadRepo may be missing
        if pull.HeadRepo != nil {
-               headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo)
+               headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo)
                if err != nil {
-                       ctx.ServerError("OpenRepository", err)
+                       ctx.ServerError("RepositoryFromContextOrOpen", err)
                        return nil
                }
-               defer headGitRepo.Close()
+               defer closer.Close()
 
                if pull.Flow == issues_model.PullRequestFlowGithub {
                        headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch)
@@ -747,6 +747,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
                MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters,
                MaxFiles:           maxFiles,
                WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
+               FileOnly:           fileOnly,
        }
 
        if !willShowSpecifiedCommit {
index 0ddd5a48e21482500e76b0d679dcfbb65c52244c..cf7da0707b8869d51c406ec94cda675f4afdfb40 100644 (file)
@@ -380,18 +380,11 @@ func (diffFile *DiffFile) GetType() int {
 }
 
 // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
-func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
+func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection {
        if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
                return nil
        }
-       leftCommit, err := gitRepo.GetCommit(leftCommitID)
-       if err != nil {
-               return nil
-       }
-       rightCommit, err := gitRepo.GetCommit(rightCommitID)
-       if err != nil {
-               return nil
-       }
+
        lastSection := diffFile.Sections[len(diffFile.Sections)-1]
        lastLine := lastSection.Lines[len(lastSection.Lines)-1]
        leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
@@ -536,11 +529,6 @@ parsingLoop:
                        lastFile := createDiffFile(diff, line)
                        diff.End = lastFile.Name
                        diff.IsIncomplete = true
-                       _, err := io.Copy(io.Discard, reader)
-                       if err != nil {
-                               // By the definition of io.Copy this never returns io.EOF
-                               return diff, fmt.Errorf("error during io.Copy: %w", err)
-                       }
                        break parsingLoop
                }
 
@@ -1101,6 +1089,7 @@ type DiffOptions struct {
        MaxFiles           int
        WhitespaceBehavior git.TrustedCmdArgs
        DirectComparison   bool
+       FileOnly           bool
 }
 
 // GetDiff builds a Diff between two commits of a repository.
@@ -1109,12 +1098,16 @@ type DiffOptions struct {
 func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
        repoPath := gitRepo.Path
 
+       var beforeCommit *git.Commit
        commit, err := gitRepo.GetCommit(opts.AfterCommitID)
        if err != nil {
                return nil, err
        }
 
-       cmdDiff := git.NewCommand(gitRepo.Ctx)
+       cmdCtx, cmdCancel := context.WithCancel(ctx)
+       defer cmdCancel()
+
+       cmdDiff := git.NewCommand(cmdCtx)
        objectFormat, err := gitRepo.GetObjectFormat()
        if err != nil {
                return nil, err
@@ -1136,6 +1129,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
                        AddArguments(opts.WhitespaceBehavior...).
                        AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
                opts.BeforeCommitID = actualBeforeCommitID
+
+               var err error
+               beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
+               if err != nil {
+                       return nil, err
+               }
        }
 
        // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
@@ -1163,14 +1162,16 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
                        Dir:     repoPath,
                        Stdout:  writer,
                        Stderr:  stderr,
-               }); err != nil {
+               }); err != nil && err.Error() != "signal: killed" {
                        log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
                }
 
                _ = writer.Close()
        }()
 
-       diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
+       diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
+       // Ensure the git process is killed if it didn't exit already
+       cmdCancel()
        if err != nil {
                return nil, fmt.Errorf("unable to ParsePatch: %w", err)
        }
@@ -1205,37 +1206,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
                }
                diffFile.IsGenerated = isGenerated.Value()
 
-               tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
+               tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
                if tailSection != nil {
                        diffFile.Sections = append(diffFile.Sections, tailSection)
                }
        }
 
-       separator := "..."
-       if opts.DirectComparison {
-               separator = ".."
+       if opts.FileOnly {
+               return diff, nil
        }
 
-       diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
-       if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
-               diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
-       }
-       diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
-       if err != nil && strings.Contains(err.Error(), "no merge base") {
-               // git >= 2.28 now returns an error if base and head have become unrelated.
-               // previously it would return the results of git diff --shortstat base head so let's try that...
-               diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
-               diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
-       }
+       stats, err := GetPullDiffStats(gitRepo, opts)
        if err != nil {
                return nil, err
        }
 
+       diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion
+
        return diff, nil
 }
 
 type PullDiffStats struct {
-       TotalAddition, TotalDeletion int
+       NumFiles, TotalAddition, TotalDeletion int
 }
 
 // GetPullDiffStats
@@ -1259,12 +1251,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
                diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
        }
 
-       _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
+       diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
        if err != nil && strings.Contains(err.Error(), "no merge base") {
                // git >= 2.28 now returns an error if base and head have become unrelated.
                // previously it would return the results of git diff --shortstat base head so let's try that...
                diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
-               _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
+               diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
        }
        if err != nil {
                return nil, err
index d70b1e8d54e77486257a0cf8eead2ca9c9f07b7c..ce98967e7976a57a08de7f6e547031cc1e558ea1 100644 (file)
@@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
                        Stderr:  stderr,
                        PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
                                _ = stdoutWriter.Close()
+                               defer cancel()
                                diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "")
                                if finalErr != nil {
                                        log.Error("ParsePatch: %v", finalErr)
@@ -371,10 +372,14 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
                        log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr)
                        return nil, finalErr
                }
-               log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
-                       t.repo.FullName(), t.basePath, err, stderr)
-               return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
-                       t.repo.FullName(), err, stderr)
+
+               // If the process exited early, don't error
+               if err != context.Canceled {
+                       log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
+                               t.repo.FullName(), t.basePath, err, stderr)
+                       return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
+                               t.repo.FullName(), err, stderr)
+               }
        }
 
        diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD")