From 3d8b5ad5f37f495e50d83bbad214f8061f8a5ac4 Mon Sep 17 00:00:00 2001 From: zeripath Date: Sat, 27 Feb 2021 18:46:14 +0000 Subject: [PATCH] Fix a couple of CommentAsPatch issues. (#14804) * CutDiffAroundLine makes the incorrect assumption that `---` and `+++` always represent part of the header of a diff. This PR adds a flag to its parsing to prevent this problem and adds a streaming parsing technique to CutDiffAroundLine using an io.pipe instead of just sending data to an unbounded buffer. Fix #14711 Signed-off-by: Andrew Thornton * Handle unquoted comment patch files When making comment patches unfortunately the patch does not always quote the filename This makes the diff --git header ambiguous again. This PR finally adds handling for ambiguity in to parse patch Fix #14812 Signed-off-by: Andrew Thornton * Add in testing for no error There is no way currently for CutDiffAroundLine in this test to cause an error however, it should still be tested. Signed-off-by: Andrew Thornton --- modules/git/diff.go | 30 ++++++--- modules/git/diff_test.go | 64 +++++++++++++++++-- modules/migrations/gitea_uploader.go | 22 ++++--- services/gitdiff/gitdiff.go | 95 ++++++++++++++++++++++++++-- services/gitdiff/gitdiff_test.go | 76 +++++++++++++++++++--- services/pull/review.go | 24 +++++-- 6 files changed, 270 insertions(+), 41 deletions(-) diff --git a/modules/git/diff.go b/modules/git/diff.go index 6f876e4964..6faad1c3c0 100644 --- a/modules/git/diff.go +++ b/modules/git/diff.go @@ -125,30 +125,39 @@ var hunkRegex = regexp.MustCompile(`^@@ -(?P[0-9]+)(,(?P[0-9]+ const cmdDiffHead = "diff --git " -func isHeader(lof string) bool { - return strings.HasPrefix(lof, cmdDiffHead) || strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++") +func isHeader(lof string, inHunk bool) bool { + return strings.HasPrefix(lof, cmdDiffHead) || (!inHunk && (strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++"))) } // CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown // it also recalculates hunks and adds the appropriate headers to the new diff. // Warning: Only one-file diffs are allowed. -func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) string { +func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) (string, error) { if line == 0 || numbersOfLine == 0 { // no line or num of lines => no diff - return "" + return "", nil } + scanner := bufio.NewScanner(originalDiff) hunk := make([]string, 0) + // begin is the start of the hunk containing searched line // end is the end of the hunk ... // currentLine is the line number on the side of the searched line (differentiated by old) // otherLine is the line number on the opposite side of the searched line (differentiated by old) var begin, end, currentLine, otherLine int64 var headerLines int + + inHunk := false + for scanner.Scan() { lof := scanner.Text() // Add header to enable parsing - if isHeader(lof) { + + if isHeader(lof, inHunk) { + if strings.HasPrefix(lof, cmdDiffHead) { + inHunk = false + } hunk = append(hunk, lof) headerLines++ } @@ -157,6 +166,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi } // Detect "hunk" with contains commented lof if strings.HasPrefix(lof, "@@") { + inHunk = true // Already got our hunk. End of hunk detected! if len(hunk) > headerLines { break @@ -213,15 +223,19 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi } } } + err := scanner.Err() + if err != nil { + return "", err + } // No hunk found if currentLine == 0 { - return "" + return "", nil } // headerLines + hunkLine (1) = totalNonCodeLines if len(hunk)-headerLines-1 <= numbersOfLine { // No need to cut the hunk => return existing hunk - return strings.Join(hunk, "\n") + return strings.Join(hunk, "\n"), nil } var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64 if old { @@ -256,5 +270,5 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi // construct the new hunk header newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@", oldBegin, oldNumOfLines, newBegin, newNumOfLines) - return strings.Join(newHunk, "\n") + return strings.Join(newHunk, "\n"), nil } diff --git a/modules/git/diff_test.go b/modules/git/diff_test.go index 4258abfe50..363ff0b970 100644 --- a/modules/git/diff_test.go +++ b/modules/git/diff_test.go @@ -23,8 +23,28 @@ const exampleDiff = `diff --git a/README.md b/README.md + cut off + cut off` +const breakingDiff = `diff --git a/aaa.sql b/aaa.sql +index d8e4c92..19dc8ad 100644 +--- a/aaa.sql ++++ b/aaa.sql +@@ -1,9 +1,10 @@ + --some comment +--- some comment 5 ++--some coment 2 ++-- some comment 3 + create or replace procedure test(p1 varchar2) + is + begin +---new comment + dbms_output.put_line(p1); ++--some other comment + end; + / +` + func TestCutDiffAroundLine(t *testing.T) { - result := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3) + result, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3) + assert.NoError(t, err) resultByLine := strings.Split(result, "\n") assert.Len(t, resultByLine, 7) // Check if headers got transferred @@ -37,18 +57,50 @@ func TestCutDiffAroundLine(t *testing.T) { assert.Equal(t, "+ Build Status", resultByLine[4]) // Must be same result as before since old line 3 == new line 5 - newResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) + newResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) + assert.NoError(t, err) assert.Equal(t, result, newResult, "Must be same result as before since old line 3 == new line 5") - newResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300) + newResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300) + assert.NoError(t, err) assert.Equal(t, exampleDiff, newResult) - emptyResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0) + emptyResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0) + assert.NoError(t, err) assert.Empty(t, emptyResult) // Line is out of scope - emptyResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0) + emptyResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0) + assert.NoError(t, err) assert.Empty(t, emptyResult) + + // Handle minus diffs properly + minusDiff, err := CutDiffAroundLine(strings.NewReader(breakingDiff), 2, false, 4) + assert.NoError(t, err) + + expected := `diff --git a/aaa.sql b/aaa.sql +--- a/aaa.sql ++++ b/aaa.sql +@@ -1,9 +1,10 @@ + --some comment +--- some comment 5 ++--some coment 2` + assert.Equal(t, expected, minusDiff) + + // Handle minus diffs properly + minusDiff, err = CutDiffAroundLine(strings.NewReader(breakingDiff), 3, false, 4) + assert.NoError(t, err) + + expected = `diff --git a/aaa.sql b/aaa.sql +--- a/aaa.sql ++++ b/aaa.sql +@@ -1,9 +1,10 @@ + --some comment +--- some comment 5 ++--some coment 2 ++-- some comment 3` + + assert.Equal(t, expected, minusDiff) } func BenchmarkCutDiffAroundLine(b *testing.B) { @@ -69,7 +121,7 @@ func ExampleCutDiffAroundLine() { Docker Pulls + cut off + cut off` - result := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3) + result, _ := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3) println(result) } diff --git a/modules/migrations/gitea_uploader.go b/modules/migrations/gitea_uploader.go index 3be49b5c6c..aa1ea4bc09 100644 --- a/modules/migrations/gitea_uploader.go +++ b/modules/migrations/gitea_uploader.go @@ -6,7 +6,6 @@ package migrations import ( - "bytes" "context" "fmt" "io" @@ -802,13 +801,20 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { } var patch string - patchBuf := new(bytes.Buffer) - if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, patchBuf); err != nil { - // We should ignore the error since the commit maybe removed when force push to the pull request - log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) - } else { - patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) - } + reader, writer := io.Pipe() + defer func() { + _ = reader.Close() + _ = writer.Close() + }() + go func() { + if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, writer); err != nil { + // We should ignore the error since the commit maybe removed when force push to the pull request + log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) + } + _ = writer.Close() + }() + + patch, _ = git.CutDiffAroundLine(reader, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) var c = models.Comment{ Type: models.CommentTypeCode, diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index d5c3923516..9f038c8744 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -583,6 +583,7 @@ type DiffFile struct { IsBin bool IsLFSFile bool IsRenamed bool + IsAmbiguous bool IsSubmodule bool Sections []*DiffSection IsIncomplete bool @@ -776,12 +777,32 @@ parsingLoop: if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } + case strings.HasPrefix(line, "rename from "): + curFile.IsRenamed = true + curFile.Type = DiffFileRename + if curFile.IsAmbiguous { + curFile.OldName = line[len("rename from ") : len(line)-1] + } + case strings.HasPrefix(line, "rename to "): + curFile.IsRenamed = true + curFile.Type = DiffFileRename + if curFile.IsAmbiguous { + curFile.Name = line[len("rename to ") : len(line)-1] + curFile.IsAmbiguous = false + } case strings.HasPrefix(line, "copy from "): curFile.IsRenamed = true curFile.Type = DiffFileCopy + if curFile.IsAmbiguous { + curFile.OldName = line[len("copy from ") : len(line)-1] + } case strings.HasPrefix(line, "copy to "): curFile.IsRenamed = true curFile.Type = DiffFileCopy + if curFile.IsAmbiguous { + curFile.Name = line[len("copy to ") : len(line)-1] + curFile.IsAmbiguous = false + } case strings.HasPrefix(line, "new file"): curFile.Type = DiffFileAdd curFile.IsCreated = true @@ -803,9 +824,35 @@ parsingLoop: case strings.HasPrefix(line, "Binary"): curFile.IsBin = true case strings.HasPrefix(line, "--- "): - // Do nothing with this line + // Handle ambiguous filenames + if curFile.IsAmbiguous { + if len(line) > 6 && line[4] == 'a' { + curFile.OldName = line[6 : len(line)-1] + if line[len(line)-2] == '\t' { + curFile.OldName = curFile.OldName[:len(curFile.OldName)-1] + } + } else { + curFile.OldName = "" + } + } + // Otherwise do nothing with this line case strings.HasPrefix(line, "+++ "): - // Do nothing with this line + // Handle ambiguous filenames + if curFile.IsAmbiguous { + if len(line) > 6 && line[4] == 'b' { + curFile.Name = line[6 : len(line)-1] + if line[len(line)-2] == '\t' { + curFile.Name = curFile.Name[:len(curFile.Name)-1] + } + if curFile.OldName == "" { + curFile.OldName = curFile.Name + } + } else { + curFile.Name = curFile.OldName + } + curFile.IsAmbiguous = false + } + // Otherwise do nothing with this line, but now switch to parsing hunks lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input) diff.TotalAddition += curFile.Addition diff.TotalDeletion += curFile.Deletion @@ -1056,13 +1103,33 @@ func createDiffFile(diff *Diff, line string) *DiffFile { rd := strings.NewReader(line[len(cmdDiffHead):] + " ") curFile.Type = DiffFileChange - curFile.OldName = readFileName(rd) - curFile.Name = readFileName(rd) + oldNameAmbiguity := false + newNameAmbiguity := false + + curFile.OldName, oldNameAmbiguity = readFileName(rd) + curFile.Name, newNameAmbiguity = readFileName(rd) + if oldNameAmbiguity && newNameAmbiguity { + curFile.IsAmbiguous = true + // OK we should bet that the oldName and the newName are the same if they can be made to be same + // So we need to start again ... + if (len(line)-len(cmdDiffHead)-1)%2 == 0 { + // diff --git a/b b/b b/b b/b b/b b/b + // + midpoint := (len(line) + len(cmdDiffHead) - 1) / 2 + new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:] + if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] { + curFile.OldName = old[2:] + curFile.Name = old[2:] + } + } + } + curFile.IsRenamed = curFile.Name != curFile.OldName return curFile } -func readFileName(rd *strings.Reader) string { +func readFileName(rd *strings.Reader) (string, bool) { + ambiguity := false var name string char, _ := rd.ReadByte() _ = rd.UnreadByte() @@ -1072,9 +1139,24 @@ func readFileName(rd *strings.Reader) string { name = name[1:] } } else { + // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone + ambiguity = true fmt.Fscanf(rd, "%s ", &name) + char, _ := rd.ReadByte() + _ = rd.UnreadByte() + for !(char == 0 || char == '"' || char == 'b') { + var suffix string + fmt.Fscanf(rd, "%s ", &suffix) + name += " " + suffix + char, _ = rd.ReadByte() + _ = rd.UnreadByte() + } + } + if len(name) < 2 { + log.Error("Unable to determine name from reader: %v", rd) + return "", true } - return name[2:] + return name[2:], ambiguity } // GetDiffRange builds a Diff between two commits of a repository. @@ -1191,6 +1273,7 @@ func CommentAsDiff(c *models.Comment) (*Diff, error) { diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch)) if err != nil { + log.Error("Unable to parse patch: %v", err) return nil, err } if len(diff.Files) == 0 { diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index a832a5d94f..2f92a01694 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -208,6 +208,66 @@ rename to a b/a a/file b/b file oldFilename: "a b/file b/a a/file", filename: "a b/a a/file b/b file", }, + { + name: "ambiguous deleted", + gitdiff: `diff --git a/b b/b b/b b/b +deleted file mode 100644 +index 92e798b..0000000 +--- a/b b/b` + "\t" + ` ++++ /dev/null +@@ -1 +0,0 @@ +-b b/b +`, + oldFilename: "b b/b", + filename: "b b/b", + addition: 0, + deletion: 1, + }, + { + name: "ambiguous addition", + gitdiff: `diff --git a/b b/b b/b b/b +new file mode 100644 +index 0000000..92e798b +--- /dev/null ++++ b/b b/b` + "\t" + ` +@@ -0,0 +1 @@ ++b b/b +`, + oldFilename: "b b/b", + filename: "b b/b", + addition: 1, + deletion: 0, + }, + { + name: "rename", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b b/b +rename to b +`, + oldFilename: "b b/b b/b b/b b/b", + filename: "b", + }, + { + name: "ambiguous 1", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b b/b +rename to b +`, + oldFilename: "b b/b b/b b/b b/b", + filename: "b", + }, + { + name: "ambiguous 2", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b +rename to b b/b +`, + oldFilename: "b b/b b/b b/b", + filename: "b b/b", + }, { name: "minuses-and-pluses", gitdiff: `diff --git a/minuses-and-pluses b/minuses-and-pluses @@ -235,32 +295,32 @@ index 6961180..9ba1a00 100644 t.Run(testcase.name, func(t *testing.T) { got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff)) if (err != nil) != testcase.wantErr { - t.Errorf("ParsePatch() error = %v, wantErr %v", err, testcase.wantErr) + t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) return } gotMarshaled, _ := json.MarshalIndent(got, " ", " ") if got.NumFiles != 1 { - t.Errorf("ParsePath() did not receive 1 file:\n%s", string(gotMarshaled)) + t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled)) return } if got.TotalAddition != testcase.addition { - t.Errorf("ParsePath() does not have correct totalAddition %d, wanted %d", got.TotalAddition, testcase.addition) + t.Errorf("ParsePath(%q) does not have correct totalAddition %d, wanted %d", testcase.name, got.TotalAddition, testcase.addition) } if got.TotalDeletion != testcase.deletion { - t.Errorf("ParsePath() did not have correct totalDeletion %d, wanted %d", got.TotalDeletion, testcase.deletion) + t.Errorf("ParsePath(%q) did not have correct totalDeletion %d, wanted %d", testcase.name, got.TotalDeletion, testcase.deletion) } file := got.Files[0] if file.Addition != testcase.addition { - t.Errorf("ParsePath() does not have correct file addition %d, wanted %d", file.Addition, testcase.addition) + t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition) } if file.Deletion != testcase.deletion { - t.Errorf("ParsePath() did not have correct file deletion %d, wanted %d", file.Deletion, testcase.deletion) + t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion) } if file.OldName != testcase.oldFilename { - t.Errorf("ParsePath() did not have correct OldName %s, wanted %s", file.OldName, testcase.oldFilename) + t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename) } if file.Name != testcase.filename { - t.Errorf("ParsePath() did not have correct Name %s, wanted %s", file.Name, testcase.filename) + t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename) } }) } diff --git a/services/pull/review.go b/services/pull/review.go index 4e77e11daa..4b647722fc 100644 --- a/services/pull/review.go +++ b/services/pull/review.go @@ -6,13 +6,14 @@ package pull import ( - "bytes" "fmt" + "io" "regexp" "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/setting" ) @@ -179,11 +180,24 @@ func createCodeComment(doer *models.User, repo *models.Repository, issue *models if len(commitID) == 0 { commitID = headCommitID } - patchBuf := new(bytes.Buffer) - if err := git.GetRepoRawDiffForFile(gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, treePath, patchBuf); err != nil { - return nil, fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", gitRepo.Path, pr.MergeBase, headCommitID, treePath, err) + reader, writer := io.Pipe() + defer func() { + _ = reader.Close() + _ = writer.Close() + }() + go func() { + if err := git.GetRepoRawDiffForFile(gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, treePath, writer); err != nil { + _ = writer.CloseWithError(fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", gitRepo.Path, pr.MergeBase, headCommitID, treePath, err)) + return + } + _ = writer.Close() + }() + + patch, err = git.CutDiffAroundLine(reader, int64((&models.Comment{Line: line}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) + if err != nil { + log.Error("Error whilst generating patch: %v", err) + return nil, err } - patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: line}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) } return models.CreateComment(&models.CreateCommentOptions{ Type: models.CommentTypeCode, -- 2.39.5