diff options
author | zeripath <art27@cantab.net> | 2023-01-28 17:56:16 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-28 17:56:16 +0000 |
commit | a9ba7379fe97fd8a1157ed076b268a3b4f2454b2 (patch) | |
tree | 54da3c82cad309b5555b0d86ce51ae20211f3bb0 | |
parent | 6be1d71e2bf3fb92d3af759a96d9e1f47840ea29 (diff) | |
download | gitea-a9ba7379fe97fd8a1157ed076b268a3b4f2454b2.tar.gz gitea-a9ba7379fe97fd8a1157ed076b268a3b4f2454b2.zip |
Improve checkIfPRContentChanged (#22611) (#22644)
Backport #22611
The code for checking if a commit has caused a change in a PR is
extremely inefficient and affects the head repository instead of using a
temporary repository.
This PR therefore makes several significant improvements:
* A temporary repo like that used in merging.
* The diff code is then significant improved to use a three-way diff
instead of comparing diffs (possibly binary) line-by-line - in memory...
Ref #22578
Signed-off-by: Andrew Thornton <art27@cantab.net>
-rw-r--r-- | modules/util/io.go | 22 | ||||
-rw-r--r-- | services/pull/pull.go | 92 |
2 files changed, 61 insertions, 53 deletions
diff --git a/modules/util/io.go b/modules/util/io.go index d765e27733..f3a24736a8 100644 --- a/modules/util/io.go +++ b/modules/util/io.go @@ -5,6 +5,7 @@ package util import ( + "errors" "io" ) @@ -18,3 +19,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { } return n, err } + +// ErrNotEmpty is an error reported when there is a non-empty reader +var ErrNotEmpty = errors.New("not-empty") + +// IsEmptyReader reads a reader and ensures it is empty +func IsEmptyReader(r io.Reader) (err error) { + var buf [1]byte + + for { + n, err := r.Read(buf[:]) + if err != nil { + if err == io.EOF { + return nil + } + return err + } + if n > 0 { + return ErrNotEmpty + } + } +} diff --git a/services/pull/pull.go b/services/pull/pull.go index 5f8bd6b671..fbfc31d4e7 100644 --- a/services/pull/pull.go +++ b/services/pull/pull.go @@ -5,14 +5,12 @@ package pull import ( - "bufio" - "bytes" "context" "fmt" "io" + "os" "regexp" "strings" - "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/db" @@ -30,6 +28,7 @@ import ( repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/sync" + "code.gitea.io/gitea/modules/util" issue_service "code.gitea.io/gitea/services/issue" ) @@ -352,69 +351,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, // checkIfPRContentChanged checks if diff to target branch has changed by push // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { - if err = pr.LoadHeadRepoCtx(ctx); err != nil { - return false, fmt.Errorf("LoadHeadRepo: %w", err) - } else if pr.HeadRepo == nil { - // corrupt data assumed changed - return true, nil - } - - if err = pr.LoadBaseRepoCtx(ctx); err != nil { - return false, fmt.Errorf("LoadBaseRepo: %w", err) - } - - headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath()) + tmpBasePath, err := createTemporaryRepo(ctx, pr) if err != nil { - return false, fmt.Errorf("OpenRepository: %w", err) - } - defer headGitRepo.Close() - - // Add a temporary remote. - tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano()) - if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil { - return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) + log.Error("CreateTemporaryRepo: %v", err) + return false, err } defer func() { - if err := headGitRepo.RemoveRemote(tmpRemote); err != nil { - log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) + if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil { + log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err) } }() - // To synchronize repo and get a base ref - _, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch) + + tmpRepo, err := git.OpenRepository(ctx, tmpBasePath) if err != nil { - return false, fmt.Errorf("GetMergeBase: %w", err) + return false, fmt.Errorf("OpenRepository: %w", err) } + defer tmpRepo.Close() - diffBefore := &bytes.Buffer{} - diffAfter := &bytes.Buffer{} - if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil { - // If old commit not found, assume changed. - log.Debug("GetDiffFromMergeBase: %v", err) - return true, nil - } - if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil { - // New commit should be found - return false, fmt.Errorf("GetDiffFromMergeBase: %w", err) + // Find the merge-base + _, base, err := tmpRepo.GetMergeBase("", "base", "tracking") + if err != nil { + return false, fmt.Errorf("GetMergeBase: %w", err) } - diffBeforeLines := bufio.NewScanner(diffBefore) - diffAfterLines := bufio.NewScanner(diffAfter) - - for diffBeforeLines.Scan() && diffAfterLines.Scan() { - if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") { - // file hashes can change without the diff changing - continue - } else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") { - // the location of the difference may change - continue - } else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) { + cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base) + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return false, fmt.Errorf("unable to open pipe for to run diff: %w", err) + } + + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: stdoutWriter, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer func() { + _ = stdoutReader.Close() + }() + return util.IsEmptyReader(stdoutReader) + }, + }); err != nil { + if err == util.ErrNotEmpty { return true, nil } - } - if diffBeforeLines.Scan() || diffAfterLines.Scan() { - // Diffs not of equal length - return true, nil + log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v", + newCommitID, oldCommitID, base, + pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch, + err) + + return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err) } return false, nil |