summaryrefslogtreecommitdiffstats
path: root/services
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2023-01-28 15:54:40 +0000
committerGitHub <noreply@github.com>2023-01-28 15:54:40 +0000
commit78e6b21c1a4c9867dd3054d6c167cc80407b020d (patch)
treec4c3d0bd55d7269ed0f625cb992d0fafbe43beda /services
parente9cd18b5578cdf84264f3d69a5f166a33d74b4e3 (diff)
downloadgitea-78e6b21c1a4c9867dd3054d6c167cc80407b020d.tar.gz
gitea-78e6b21c1a4c9867dd3054d6c167cc80407b020d.zip
Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'services')
-rw-r--r--services/pull/pull.go92
1 files changed, 39 insertions, 53 deletions
diff --git a/services/pull/pull.go b/services/pull/pull.go
index 7f81def6d6..c983c4f3e7 100644
--- a/services/pull/pull.go
+++ b/services/pull/pull.go
@@ -4,14 +4,12 @@
package pull
import (
- "bufio"
- "bytes"
"context"
"fmt"
"io"
+ "os"
"regexp"
"strings"
- "time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/db"
@@ -29,6 +27,7 @@ import (
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sync"
+ "code.gitea.io/gitea/modules/util"
issue_service "code.gitea.io/gitea/services/issue"
)
@@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
// checkIfPRContentChanged checks if diff to target branch has changed by push
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
- if err = pr.LoadHeadRepo(ctx); err != nil {
- return false, fmt.Errorf("LoadHeadRepo: %w", err)
- } else if pr.HeadRepo == nil {
- // corrupt data assumed changed
- return true, nil
- }
-
- if err = pr.LoadBaseRepo(ctx); err != nil {
- return false, fmt.Errorf("LoadBaseRepo: %w", err)
- }
-
- headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
+ tmpBasePath, err := createTemporaryRepo(ctx, pr)
if err != nil {
- return false, fmt.Errorf("OpenRepository: %w", err)
- }
- defer headGitRepo.Close()
-
- // Add a temporary remote.
- tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
- if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
- return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
+ log.Error("CreateTemporaryRepo: %v", err)
+ return false, err
}
defer func() {
- if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
- log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
+ if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
+ log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
}
}()
- // To synchronize repo and get a base ref
- _, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)
+
+ tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
if err != nil {
- return false, fmt.Errorf("GetMergeBase: %w", err)
+ return false, fmt.Errorf("OpenRepository: %w", err)
}
+ defer tmpRepo.Close()
- diffBefore := &bytes.Buffer{}
- diffAfter := &bytes.Buffer{}
- if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
- // If old commit not found, assume changed.
- log.Debug("GetDiffFromMergeBase: %v", err)
- return true, nil
- }
- if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
- // New commit should be found
- return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
+ // Find the merge-base
+ _, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
+ if err != nil {
+ return false, fmt.Errorf("GetMergeBase: %w", err)
}
- diffBeforeLines := bufio.NewScanner(diffBefore)
- diffAfterLines := bufio.NewScanner(diffAfter)
-
- for diffBeforeLines.Scan() && diffAfterLines.Scan() {
- if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
- // file hashes can change without the diff changing
- continue
- } else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
- // the location of the difference may change
- continue
- } else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
+ cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
+ stdoutReader, stdoutWriter, err := os.Pipe()
+ if err != nil {
+ return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
+ }
+
+ if err := cmd.Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: stdoutWriter,
+ PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
+ _ = stdoutWriter.Close()
+ defer func() {
+ _ = stdoutReader.Close()
+ }()
+ return util.IsEmptyReader(stdoutReader)
+ },
+ }); err != nil {
+ if err == util.ErrNotEmpty {
return true, nil
}
- }
- if diffBeforeLines.Scan() || diffAfterLines.Scan() {
- // Diffs not of equal length
- return true, nil
+ log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
+ newCommitID, oldCommitID, base,
+ pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
+ err)
+
+ return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
}
return false, nil