diff options
author | zeripath <art27@cantab.net> | 2023-01-28 15:54:40 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-28 15:54:40 +0000 |
commit | 78e6b21c1a4c9867dd3054d6c167cc80407b020d (patch) | |
tree | c4c3d0bd55d7269ed0f625cb992d0fafbe43beda /services | |
parent | e9cd18b5578cdf84264f3d69a5f166a33d74b4e3 (diff) | |
download | gitea-78e6b21c1a4c9867dd3054d6c167cc80407b020d.tar.gz gitea-78e6b21c1a4c9867dd3054d6c167cc80407b020d.zip |
Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is
extremely inefficient and affects the head repository instead of using a
temporary repository.
This PR therefore makes several significant improvements:
* A temporary repo like that used in merging.
* The diff code is then significant improved to use a three-way diff
instead of comparing diffs (possibly binary) line-by-line - in memory...
Ref #22578
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services')
-rw-r--r-- | services/pull/pull.go | 92 |
1 files changed, 39 insertions, 53 deletions
diff --git a/services/pull/pull.go b/services/pull/pull.go index 7f81def6d6..c983c4f3e7 100644 --- a/services/pull/pull.go +++ b/services/pull/pull.go @@ -4,14 +4,12 @@ package pull import ( - "bufio" - "bytes" "context" "fmt" "io" + "os" "regexp" "strings" - "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/db" @@ -29,6 +27,7 @@ import ( repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/sync" + "code.gitea.io/gitea/modules/util" issue_service "code.gitea.io/gitea/services/issue" ) @@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, // checkIfPRContentChanged checks if diff to target branch has changed by push // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { - if err = pr.LoadHeadRepo(ctx); err != nil { - return false, fmt.Errorf("LoadHeadRepo: %w", err) - } else if pr.HeadRepo == nil { - // corrupt data assumed changed - return true, nil - } - - if err = pr.LoadBaseRepo(ctx); err != nil { - return false, fmt.Errorf("LoadBaseRepo: %w", err) - } - - headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath()) + tmpBasePath, err := createTemporaryRepo(ctx, pr) if err != nil { - return false, fmt.Errorf("OpenRepository: %w", err) - } - defer headGitRepo.Close() - - // Add a temporary remote. - tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano()) - if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil { - return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) + log.Error("CreateTemporaryRepo: %v", err) + return false, err } defer func() { - if err := headGitRepo.RemoveRemote(tmpRemote); err != nil { - log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) + if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil { + log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err) } }() - // To synchronize repo and get a base ref - _, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch) + + tmpRepo, err := git.OpenRepository(ctx, tmpBasePath) if err != nil { - return false, fmt.Errorf("GetMergeBase: %w", err) + return false, fmt.Errorf("OpenRepository: %w", err) } + defer tmpRepo.Close() - diffBefore := &bytes.Buffer{} - diffAfter := &bytes.Buffer{} - if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil { - // If old commit not found, assume changed. - log.Debug("GetDiffFromMergeBase: %v", err) - return true, nil - } - if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil { - // New commit should be found - return false, fmt.Errorf("GetDiffFromMergeBase: %w", err) + // Find the merge-base + _, base, err := tmpRepo.GetMergeBase("", "base", "tracking") + if err != nil { + return false, fmt.Errorf("GetMergeBase: %w", err) } - diffBeforeLines := bufio.NewScanner(diffBefore) - diffAfterLines := bufio.NewScanner(diffAfter) - - for diffBeforeLines.Scan() && diffAfterLines.Scan() { - if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") { - // file hashes can change without the diff changing - continue - } else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") { - // the location of the difference may change - continue - } else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) { + cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base) + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return false, fmt.Errorf("unable to open pipe for to run diff: %w", err) + } + + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: stdoutWriter, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer func() { + _ = stdoutReader.Close() + }() + return util.IsEmptyReader(stdoutReader) + }, + }); err != nil { + if err == util.ErrNotEmpty { return true, nil } - } - if diffBeforeLines.Scan() || diffAfterLines.Scan() { - // Diffs not of equal length - return true, nil + log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v", + newCommitID, oldCommitID, base, + pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch, + err) + + return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err) } return false, nil |