aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRowan Bohde <rowan.bohde@gmail.com>2024-11-01 22:29:37 -0500
committerGitHub <noreply@github.com>2024-11-02 11:29:37 +0800
commit7dcccc3bb19655a6f83dd495ffc332708d0c8678 (patch)
treec859447a811114dbe300311f12dd24787d3e39cf
parentec2d1593c269e06655525deb96f74b8094221b6f (diff)
downloadgitea-7dcccc3bb19655a6f83dd495ffc332708d0c8678.tar.gz
gitea-7dcccc3bb19655a6f83dd495ffc332708d0c8678.zip
improve performance of diffs (#32393)
This has two major changes that significantly reduce the amount of work done for large diffs: * Kill a running git process when reaching the maximum number of files in a diff, preventing it from processing the entire diff. * When loading a diff with the URL param `file-only=true`, skip loading stats. This speeds up loading both hidden files of a diff and sections of a diff when clicking the "Show More" button. A couple of minor things from profiling are also included: * Reuse existing repo in `PrepareViewPullInfo` if head and base are the same. The performance impact is going to depend heavily on the individual diff and the hardware it runs on, but when testing locally on a diff changing 100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction in time to load the result of "Show More" --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
-rw-r--r--routers/web/repo/commit.go1
-rw-r--r--routers/web/repo/compare.go3
-rw-r--r--routers/web/repo/pull.go7
-rw-r--r--services/gitdiff/gitdiff.go62
-rw-r--r--services/repository/files/temp_repo.go13
5 files changed, 44 insertions, 42 deletions
diff --git a/routers/web/repo/commit.go b/routers/web/repo/commit.go
index 0e4e10bf50..d7865e18d6 100644
--- a/routers/web/repo/commit.go
+++ b/routers/web/repo/commit.go
@@ -328,6 +328,7 @@ func Diff(ctx *context.Context) {
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: maxFiles,
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
+ FileOnly: fileOnly,
}, files...)
if err != nil {
ctx.NotFound("GetDiff", err)
diff --git a/routers/web/repo/compare.go b/routers/web/repo/compare.go
index e6a04782e5..3927972837 100644
--- a/routers/web/repo/compare.go
+++ b/routers/web/repo/compare.go
@@ -611,6 +611,8 @@ func PrepareCompareDiff(
maxLines, maxFiles = -1, -1
}
+ fileOnly := ctx.FormBool("file-only")
+
diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
&gitdiff.DiffOptions{
BeforeCommitID: beforeCommitID,
@@ -621,6 +623,7 @@ func PrepareCompareDiff(
MaxFiles: maxFiles,
WhitespaceBehavior: whitespaceBehavior,
DirectComparison: ci.DirectComparison,
+ FileOnly: fileOnly,
}, ctx.FormStrings("files")...)
if err != nil {
ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err)
diff --git a/routers/web/repo/pull.go b/routers/web/repo/pull.go
index 0efe1be76a..cc554a71ff 100644
--- a/routers/web/repo/pull.go
+++ b/routers/web/repo/pull.go
@@ -395,12 +395,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C
var headBranchSha string
// HeadRepo may be missing
if pull.HeadRepo != nil {
- headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo)
+ headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo)
if err != nil {
- ctx.ServerError("OpenRepository", err)
+ ctx.ServerError("RepositoryFromContextOrOpen", err)
return nil
}
- defer headGitRepo.Close()
+ defer closer.Close()
if pull.Flow == issues_model.PullRequestFlowGithub {
headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch)
@@ -747,6 +747,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
MaxFiles: maxFiles,
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
+ FileOnly: fileOnly,
}
if !willShowSpecifiedCommit {
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 0ddd5a48e2..cf7da0707b 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -380,18 +380,11 @@ func (diffFile *DiffFile) GetType() int {
}
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
-func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
+func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection {
if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
return nil
}
- leftCommit, err := gitRepo.GetCommit(leftCommitID)
- if err != nil {
- return nil
- }
- rightCommit, err := gitRepo.GetCommit(rightCommitID)
- if err != nil {
- return nil
- }
+
lastSection := diffFile.Sections[len(diffFile.Sections)-1]
lastLine := lastSection.Lines[len(lastSection.Lines)-1]
leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
@@ -536,11 +529,6 @@ parsingLoop:
lastFile := createDiffFile(diff, line)
diff.End = lastFile.Name
diff.IsIncomplete = true
- _, err := io.Copy(io.Discard, reader)
- if err != nil {
- // By the definition of io.Copy this never returns io.EOF
- return diff, fmt.Errorf("error during io.Copy: %w", err)
- }
break parsingLoop
}
@@ -1101,6 +1089,7 @@ type DiffOptions struct {
MaxFiles int
WhitespaceBehavior git.TrustedCmdArgs
DirectComparison bool
+ FileOnly bool
}
// GetDiff builds a Diff between two commits of a repository.
@@ -1109,12 +1098,16 @@ type DiffOptions struct {
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
repoPath := gitRepo.Path
+ var beforeCommit *git.Commit
commit, err := gitRepo.GetCommit(opts.AfterCommitID)
if err != nil {
return nil, err
}
- cmdDiff := git.NewCommand(gitRepo.Ctx)
+ cmdCtx, cmdCancel := context.WithCancel(ctx)
+ defer cmdCancel()
+
+ cmdDiff := git.NewCommand(cmdCtx)
objectFormat, err := gitRepo.GetObjectFormat()
if err != nil {
return nil, err
@@ -1136,6 +1129,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
AddArguments(opts.WhitespaceBehavior...).
AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
opts.BeforeCommitID = actualBeforeCommitID
+
+ var err error
+ beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
+ if err != nil {
+ return nil, err
+ }
}
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
@@ -1163,14 +1162,16 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
Dir: repoPath,
Stdout: writer,
Stderr: stderr,
- }); err != nil {
+ }); err != nil && err.Error() != "signal: killed" {
log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
}
_ = writer.Close()
}()
- diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
+ diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
+ // Ensure the git process is killed if it didn't exit already
+ cmdCancel()
if err != nil {
return nil, fmt.Errorf("unable to ParsePatch: %w", err)
}
@@ -1205,37 +1206,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
}
diffFile.IsGenerated = isGenerated.Value()
- tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
+ tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
if tailSection != nil {
diffFile.Sections = append(diffFile.Sections, tailSection)
}
}
- separator := "..."
- if opts.DirectComparison {
- separator = ".."
+ if opts.FileOnly {
+ return diff, nil
}
- diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
- if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
- diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
- }
- diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
- if err != nil && strings.Contains(err.Error(), "no merge base") {
- // git >= 2.28 now returns an error if base and head have become unrelated.
- // previously it would return the results of git diff --shortstat base head so let's try that...
- diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
- diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
- }
+ stats, err := GetPullDiffStats(gitRepo, opts)
if err != nil {
return nil, err
}
+ diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion
+
return diff, nil
}
type PullDiffStats struct {
- TotalAddition, TotalDeletion int
+ NumFiles, TotalAddition, TotalDeletion int
}
// GetPullDiffStats
@@ -1259,12 +1251,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
}
- _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
+ diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
if err != nil && strings.Contains(err.Error(), "no merge base") {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff --shortstat base head so let's try that...
diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
- _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
+ diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
}
if err != nil {
return nil, err
diff --git a/services/repository/files/temp_repo.go b/services/repository/files/temp_repo.go
index d70b1e8d54..ce98967e79 100644
--- a/services/repository/files/temp_repo.go
+++ b/services/repository/files/temp_repo.go
@@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
Stderr: stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
+ defer cancel()
diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "")
if finalErr != nil {
log.Error("ParsePatch: %v", finalErr)
@@ -371,10 +372,14 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr)
return nil, finalErr
}
- log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
- t.repo.FullName(), t.basePath, err, stderr)
- return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
- t.repo.FullName(), err, stderr)
+
+ // If the process exited early, don't error
+ if err != context.Canceled {
+ log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
+ t.repo.FullName(), t.basePath, err, stderr)
+ return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
+ t.repo.FullName(), err, stderr)
+ }
}
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD")