aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2023-01-28 17:56:16 +0000
committerGitHub <noreply@github.com>2023-01-28 17:56:16 +0000
commita9ba7379fe97fd8a1157ed076b268a3b4f2454b2 (patch)
tree54da3c82cad309b5555b0d86ce51ae20211f3bb0
parent6be1d71e2bf3fb92d3af759a96d9e1f47840ea29 (diff)
downloadgitea-a9ba7379fe97fd8a1157ed076b268a3b4f2454b2.tar.gz
gitea-a9ba7379fe97fd8a1157ed076b268a3b4f2454b2.zip
Improve checkIfPRContentChanged (#22611) (#22644)
Backport #22611 The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <art27@cantab.net>
-rw-r--r--modules/util/io.go22
-rw-r--r--services/pull/pull.go92
2 files changed, 61 insertions, 53 deletions
diff --git a/modules/util/io.go b/modules/util/io.go
index d765e27733..f3a24736a8 100644
--- a/modules/util/io.go
+++ b/modules/util/io.go
@@ -5,6 +5,7 @@
package util
import (
+ "errors"
"io"
)
@@ -18,3 +19,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
}
return n, err
}
+
+// ErrNotEmpty is an error reported when there is a non-empty reader
+var ErrNotEmpty = errors.New("not-empty")
+
+// IsEmptyReader reads a reader and ensures it is empty
+func IsEmptyReader(r io.Reader) (err error) {
+ var buf [1]byte
+
+ for {
+ n, err := r.Read(buf[:])
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ if n > 0 {
+ return ErrNotEmpty
+ }
+ }
+}
diff --git a/services/pull/pull.go b/services/pull/pull.go
index 5f8bd6b671..fbfc31d4e7 100644
--- a/services/pull/pull.go
+++ b/services/pull/pull.go
@@ -5,14 +5,12 @@
package pull
import (
- "bufio"
- "bytes"
"context"
"fmt"
"io"
+ "os"
"regexp"
"strings"
- "time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/db"
@@ -30,6 +28,7 @@ import (
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sync"
+ "code.gitea.io/gitea/modules/util"
issue_service "code.gitea.io/gitea/services/issue"
)
@@ -352,69 +351,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
// checkIfPRContentChanged checks if diff to target branch has changed by push
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
- if err = pr.LoadHeadRepoCtx(ctx); err != nil {
- return false, fmt.Errorf("LoadHeadRepo: %w", err)
- } else if pr.HeadRepo == nil {
- // corrupt data assumed changed
- return true, nil
- }
-
- if err = pr.LoadBaseRepoCtx(ctx); err != nil {
- return false, fmt.Errorf("LoadBaseRepo: %w", err)
- }
-
- headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
+ tmpBasePath, err := createTemporaryRepo(ctx, pr)
if err != nil {
- return false, fmt.Errorf("OpenRepository: %w", err)
- }
- defer headGitRepo.Close()
-
- // Add a temporary remote.
- tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
- if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
- return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
+ log.Error("CreateTemporaryRepo: %v", err)
+ return false, err
}
defer func() {
- if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
- log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
+ if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
+ log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
}
}()
- // To synchronize repo and get a base ref
- _, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)
+
+ tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
if err != nil {
- return false, fmt.Errorf("GetMergeBase: %w", err)
+ return false, fmt.Errorf("OpenRepository: %w", err)
}
+ defer tmpRepo.Close()
- diffBefore := &bytes.Buffer{}
- diffAfter := &bytes.Buffer{}
- if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
- // If old commit not found, assume changed.
- log.Debug("GetDiffFromMergeBase: %v", err)
- return true, nil
- }
- if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
- // New commit should be found
- return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
+ // Find the merge-base
+ _, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
+ if err != nil {
+ return false, fmt.Errorf("GetMergeBase: %w", err)
}
- diffBeforeLines := bufio.NewScanner(diffBefore)
- diffAfterLines := bufio.NewScanner(diffAfter)
-
- for diffBeforeLines.Scan() && diffAfterLines.Scan() {
- if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
- // file hashes can change without the diff changing
- continue
- } else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
- // the location of the difference may change
- continue
- } else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
+ cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
+ stdoutReader, stdoutWriter, err := os.Pipe()
+ if err != nil {
+ return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
+ }
+
+ if err := cmd.Run(&git.RunOpts{
+ Dir: tmpBasePath,
+ Stdout: stdoutWriter,
+ PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
+ _ = stdoutWriter.Close()
+ defer func() {
+ _ = stdoutReader.Close()
+ }()
+ return util.IsEmptyReader(stdoutReader)
+ },
+ }); err != nil {
+ if err == util.ErrNotEmpty {
return true, nil
}
- }
- if diffBeforeLines.Scan() || diffAfterLines.Scan() {
- // Diffs not of equal length
- return true, nil
+ log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
+ newCommitID, oldCommitID, base,
+ pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
+ err)
+
+ return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
}
return false, nil