diff options
author | Lunny Xiao <xiaolunwen@gmail.com> | 2020-01-28 16:02:03 +0800 |
---|---|---|
committer | zeripath <art27@cantab.net> | 2020-01-28 08:02:03 +0000 |
commit | e8860ef4f9fe84aac856e354a897734aac7c279b (patch) | |
tree | 09b7248b5115972f0f1e8be9f5b9dcdc7d7f8d72 /modules | |
parent | 1019913eaba0e8ad3a67174a1a13c673ee832406 (diff) | |
download | gitea-e8860ef4f9fe84aac856e354a897734aac7c279b.tar.gz gitea-e8860ef4f9fe84aac856e354a897734aac7c279b.zip |
Some refactor on git diff and ignore getting commit information failed on migrating pull request review comments (#9996)
* Some refactor on git diff and ignore getting commit information failed on migrating pull request review comments
* fix test
* fix lint
* Change error log to warn
Diffstat (limited to 'modules')
-rw-r--r-- | modules/git/diff.go | 260 | ||||
-rw-r--r-- | modules/git/diff_test.go | 82 | ||||
-rw-r--r-- | modules/migrations/gitea.go | 16 |
3 files changed, 351 insertions, 7 deletions
diff --git a/modules/git/diff.go b/modules/git/diff.go new file mode 100644 index 0000000000..6f876e4964 --- /dev/null +++ b/modules/git/diff.go @@ -0,0 +1,260 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "os/exec" + "regexp" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/process" +) + +// RawDiffType type of a raw diff. +type RawDiffType string + +// RawDiffType possible values. +const ( + RawDiffNormal RawDiffType = "diff" + RawDiffPatch RawDiffType = "patch" +) + +// GetRawDiff dumps diff results of repository in given commit ID to io.Writer. +func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error { + return GetRawDiffForFile(repoPath, "", commitID, diffType, "", writer) +} + +// GetRawDiffForFile dumps diff results of file in given commit ID to io.Writer. +func GetRawDiffForFile(repoPath, startCommit, endCommit string, diffType RawDiffType, file string, writer io.Writer) error { + repo, err := OpenRepository(repoPath) + if err != nil { + return fmt.Errorf("OpenRepository: %v", err) + } + defer repo.Close() + + return GetRepoRawDiffForFile(repo, startCommit, endCommit, diffType, file, writer) +} + +// GetRepoRawDiffForFile dumps diff results of file in given commit ID to io.Writer according given repository +func GetRepoRawDiffForFile(repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string, writer io.Writer) error { + commit, err := repo.GetCommit(endCommit) + if err != nil { + return fmt.Errorf("GetCommit: %v", err) + } + fileArgs := make([]string, 0) + if len(file) > 0 { + fileArgs = append(fileArgs, "--", file) + } + // FIXME: graceful: These commands should have a timeout + ctx, cancel := context.WithCancel(DefaultContext) + defer cancel() + + var cmd *exec.Cmd + switch diffType { + case RawDiffNormal: + if len(startCommit) != 0 { + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"diff", "-M", startCommit, endCommit}, fileArgs...)...) + } else if commit.ParentCount() == 0 { + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"show", endCommit}, fileArgs...)...) + } else { + c, _ := commit.Parent(0) + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"diff", "-M", c.ID.String(), endCommit}, fileArgs...)...) + } + case RawDiffPatch: + if len(startCommit) != 0 { + query := fmt.Sprintf("%s...%s", endCommit, startCommit) + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", "--root", query}, fileArgs...)...) + } else if commit.ParentCount() == 0 { + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", "--root", endCommit}, fileArgs...)...) + } else { + c, _ := commit.Parent(0) + query := fmt.Sprintf("%s...%s", endCommit, c.ID.String()) + cmd = exec.CommandContext(ctx, GitExecutable, append([]string{"format-patch", "--no-signature", "--stdout", query}, fileArgs...)...) + } + default: + return fmt.Errorf("invalid diffType: %s", diffType) + } + + stderr := new(bytes.Buffer) + + cmd.Dir = repo.Path + cmd.Stdout = writer + cmd.Stderr = stderr + pid := process.GetManager().Add(fmt.Sprintf("GetRawDiffForFile: [repo_path: %s]", repo.Path), cancel) + defer process.GetManager().Remove(pid) + + if err = cmd.Run(); err != nil { + return fmt.Errorf("Run: %v - %s", err, stderr) + } + return nil +} + +// ParseDiffHunkString parse the diffhunk content and return +func ParseDiffHunkString(diffhunk string) (leftLine, leftHunk, rightLine, righHunk int) { + ss := strings.Split(diffhunk, "@@") + ranges := strings.Split(ss[1][1:], " ") + leftRange := strings.Split(ranges[0], ",") + leftLine, _ = strconv.Atoi(leftRange[0][1:]) + if len(leftRange) > 1 { + leftHunk, _ = strconv.Atoi(leftRange[1]) + } + if len(ranges) > 1 { + rightRange := strings.Split(ranges[1], ",") + rightLine, _ = strconv.Atoi(rightRange[0]) + if len(rightRange) > 1 { + righHunk, _ = strconv.Atoi(rightRange[1]) + } + } else { + log("Parse line number failed: %v", diffhunk) + rightLine = leftLine + righHunk = leftHunk + } + return +} + +// Example: @@ -1,8 +1,9 @@ => [..., 1, 8, 1, 9] +var hunkRegex = regexp.MustCompile(`^@@ -(?P<beginOld>[0-9]+)(,(?P<endOld>[0-9]+))? \+(?P<beginNew>[0-9]+)(,(?P<endNew>[0-9]+))? @@`) + +const cmdDiffHead = "diff --git " + +func isHeader(lof string) bool { + return strings.HasPrefix(lof, cmdDiffHead) || strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++") +} + +// CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown +// it also recalculates hunks and adds the appropriate headers to the new diff. +// Warning: Only one-file diffs are allowed. +func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) string { + if line == 0 || numbersOfLine == 0 { + // no line or num of lines => no diff + return "" + } + scanner := bufio.NewScanner(originalDiff) + hunk := make([]string, 0) + // begin is the start of the hunk containing searched line + // end is the end of the hunk ... + // currentLine is the line number on the side of the searched line (differentiated by old) + // otherLine is the line number on the opposite side of the searched line (differentiated by old) + var begin, end, currentLine, otherLine int64 + var headerLines int + for scanner.Scan() { + lof := scanner.Text() + // Add header to enable parsing + if isHeader(lof) { + hunk = append(hunk, lof) + headerLines++ + } + if currentLine > line { + break + } + // Detect "hunk" with contains commented lof + if strings.HasPrefix(lof, "@@") { + // Already got our hunk. End of hunk detected! + if len(hunk) > headerLines { + break + } + // A map with named groups of our regex to recognize them later more easily + submatches := hunkRegex.FindStringSubmatch(lof) + groups := make(map[string]string) + for i, name := range hunkRegex.SubexpNames() { + if i != 0 && name != "" { + groups[name] = submatches[i] + } + } + if old { + begin, _ = strconv.ParseInt(groups["beginOld"], 10, 64) + end, _ = strconv.ParseInt(groups["endOld"], 10, 64) + // init otherLine with begin of opposite side + otherLine, _ = strconv.ParseInt(groups["beginNew"], 10, 64) + } else { + begin, _ = strconv.ParseInt(groups["beginNew"], 10, 64) + if groups["endNew"] != "" { + end, _ = strconv.ParseInt(groups["endNew"], 10, 64) + } else { + end = 0 + } + // init otherLine with begin of opposite side + otherLine, _ = strconv.ParseInt(groups["beginOld"], 10, 64) + } + end += begin // end is for real only the number of lines in hunk + // lof is between begin and end + if begin <= line && end >= line { + hunk = append(hunk, lof) + currentLine = begin + continue + } + } else if len(hunk) > headerLines { + hunk = append(hunk, lof) + // Count lines in context + switch lof[0] { + case '+': + if !old { + currentLine++ + } else { + otherLine++ + } + case '-': + if old { + currentLine++ + } else { + otherLine++ + } + default: + currentLine++ + otherLine++ + } + } + } + + // No hunk found + if currentLine == 0 { + return "" + } + // headerLines + hunkLine (1) = totalNonCodeLines + if len(hunk)-headerLines-1 <= numbersOfLine { + // No need to cut the hunk => return existing hunk + return strings.Join(hunk, "\n") + } + var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64 + if old { + oldBegin = currentLine + newBegin = otherLine + } else { + oldBegin = otherLine + newBegin = currentLine + } + // headers + hunk header + newHunk := make([]string, headerLines) + // transfer existing headers + copy(newHunk, hunk[:headerLines]) + // transfer last n lines + newHunk = append(newHunk, hunk[len(hunk)-numbersOfLine-1:]...) + // calculate newBegin, ... by counting lines + for i := len(hunk) - 1; i >= len(hunk)-numbersOfLine; i-- { + switch hunk[i][0] { + case '+': + newBegin-- + newNumOfLines++ + case '-': + oldBegin-- + oldNumOfLines++ + default: + oldBegin-- + newBegin-- + newNumOfLines++ + oldNumOfLines++ + } + } + // construct the new hunk header + newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@", + oldBegin, oldNumOfLines, newBegin, newNumOfLines) + return strings.Join(newHunk, "\n") +} diff --git a/modules/git/diff_test.go b/modules/git/diff_test.go new file mode 100644 index 0000000000..4258abfe50 --- /dev/null +++ b/modules/git/diff_test.go @@ -0,0 +1,82 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +const exampleDiff = `diff --git a/README.md b/README.md +--- a/README.md ++++ b/README.md +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + +func TestCutDiffAroundLine(t *testing.T) { + result := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3) + resultByLine := strings.Split(result, "\n") + assert.Len(t, resultByLine, 7) + // Check if headers got transferred + assert.Equal(t, "diff --git a/README.md b/README.md", resultByLine[0]) + assert.Equal(t, "--- a/README.md", resultByLine[1]) + assert.Equal(t, "+++ b/README.md", resultByLine[2]) + // Check if hunk header is calculated correctly + assert.Equal(t, "@@ -2,2 +3,2 @@", resultByLine[3]) + // Check if line got transferred + assert.Equal(t, "+ Build Status", resultByLine[4]) + + // Must be same result as before since old line 3 == new line 5 + newResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) + assert.Equal(t, result, newResult, "Must be same result as before since old line 3 == new line 5") + + newResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300) + assert.Equal(t, exampleDiff, newResult) + + emptyResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0) + assert.Empty(t, emptyResult) + + // Line is out of scope + emptyResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0) + assert.Empty(t, emptyResult) +} + +func BenchmarkCutDiffAroundLine(b *testing.B) { + for n := 0; n < b.N; n++ { + CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) + } +} + +func ExampleCutDiffAroundLine() { + const diff = `diff --git a/README.md b/README.md +--- a/README.md ++++ b/README.md +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + result := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3) + println(result) +} + +func TestParseDiffHunkString(t *testing.T) { + leftLine, leftHunk, rightLine, rightHunk := ParseDiffHunkString("@@ -19,3 +19,5 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER") + assert.EqualValues(t, 19, leftLine) + assert.EqualValues(t, 3, leftHunk) + assert.EqualValues(t, 19, rightLine) + assert.EqualValues(t, 5, rightHunk) +} diff --git a/modules/migrations/gitea.go b/modules/migrations/gitea.go index 96d47dc527..a087cafe9d 100644 --- a/modules/migrations/gitea.go +++ b/modules/migrations/gitea.go @@ -28,7 +28,6 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/services/gitdiff" gouuid "github.com/satori/go.uuid" ) @@ -783,19 +782,22 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { } for _, comment := range review.Comments { + _, _, line, _ := git.ParseDiffHunkString(comment.DiffHunk) + headCommitID, err := g.gitRepo.GetRefCommitID(pr.GetGitRefName()) if err != nil { return fmt.Errorf("GetRefCommitID[%s]: %v", pr.GetGitRefName(), err) } + + var patch string patchBuf := new(bytes.Buffer) - if err := gitdiff.GetRawDiffForFile(g.gitRepo.Path, pr.MergeBase, headCommitID, gitdiff.RawDiffNormal, comment.TreePath, patchBuf); err != nil { - return fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) + if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, patchBuf); err != nil { + // We should ignore the error since the commit maybe removed when force push to the pull request + log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) + } else { + patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) } - _, _, line, _ := gitdiff.ParseDiffHunkString(comment.DiffHunk) - - patch := gitdiff.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) - var c = models.Comment{ Type: models.CommentTypeCode, PosterID: comment.PosterID, |