* CutDiffAroundLine makes the incorrect assumption that `---` and `+++` always represent part of the header of a diff. This PR adds a flag to its parsing to prevent this problem and adds a streaming parsing technique to CutDiffAroundLine using an io.pipe instead of just sending data to an unbounded buffer. Fix #14711 Signed-off-by: Andrew Thornton <art27@cantab.net> * Handle unquoted comment patch files When making comment patches unfortunately the patch does not always quote the filename This makes the diff --git header ambiguous again. This PR finally adds handling for ambiguity in to parse patch Fix #14812 Signed-off-by: Andrew Thornton <art27@cantab.net> * Add in testing for no error There is no way currently for CutDiffAroundLine in this test to cause an error however, it should still be tested. Signed-off-by: Andrew Thornton <art27@cantab.net>tags/v1.15.0-dev
@@ -125,30 +125,39 @@ var hunkRegex = regexp.MustCompile(`^@@ -(?P<beginOld>[0-9]+)(,(?P<endOld>[0-9]+ | |||
const cmdDiffHead = "diff --git " | |||
func isHeader(lof string) bool { | |||
return strings.HasPrefix(lof, cmdDiffHead) || strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++") | |||
func isHeader(lof string, inHunk bool) bool { | |||
return strings.HasPrefix(lof, cmdDiffHead) || (!inHunk && (strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++"))) | |||
} | |||
// CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown | |||
// it also recalculates hunks and adds the appropriate headers to the new diff. | |||
// Warning: Only one-file diffs are allowed. | |||
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) string { | |||
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) (string, error) { | |||
if line == 0 || numbersOfLine == 0 { | |||
// no line or num of lines => no diff | |||
return "" | |||
return "", nil | |||
} | |||
scanner := bufio.NewScanner(originalDiff) | |||
hunk := make([]string, 0) | |||
// begin is the start of the hunk containing searched line | |||
// end is the end of the hunk ... | |||
// currentLine is the line number on the side of the searched line (differentiated by old) | |||
// otherLine is the line number on the opposite side of the searched line (differentiated by old) | |||
var begin, end, currentLine, otherLine int64 | |||
var headerLines int | |||
inHunk := false | |||
for scanner.Scan() { | |||
lof := scanner.Text() | |||
// Add header to enable parsing | |||
if isHeader(lof) { | |||
if isHeader(lof, inHunk) { | |||
if strings.HasPrefix(lof, cmdDiffHead) { | |||
inHunk = false | |||
} | |||
hunk = append(hunk, lof) | |||
headerLines++ | |||
} | |||
@@ -157,6 +166,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi | |||
} | |||
// Detect "hunk" with contains commented lof | |||
if strings.HasPrefix(lof, "@@") { | |||
inHunk = true | |||
// Already got our hunk. End of hunk detected! | |||
if len(hunk) > headerLines { | |||
break | |||
@@ -213,15 +223,19 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi | |||
} | |||
} | |||
} | |||
err := scanner.Err() | |||
if err != nil { | |||
return "", err | |||
} | |||
// No hunk found | |||
if currentLine == 0 { | |||
return "" | |||
return "", nil | |||
} | |||
// headerLines + hunkLine (1) = totalNonCodeLines | |||
if len(hunk)-headerLines-1 <= numbersOfLine { | |||
// No need to cut the hunk => return existing hunk | |||
return strings.Join(hunk, "\n") | |||
return strings.Join(hunk, "\n"), nil | |||
} | |||
var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64 | |||
if old { | |||
@@ -256,5 +270,5 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi | |||
// construct the new hunk header | |||
newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@", | |||
oldBegin, oldNumOfLines, newBegin, newNumOfLines) | |||
return strings.Join(newHunk, "\n") | |||
return strings.Join(newHunk, "\n"), nil | |||
} |
@@ -23,8 +23,28 @@ const exampleDiff = `diff --git a/README.md b/README.md | |||
+ cut off | |||
+ cut off` | |||
const breakingDiff = `diff --git a/aaa.sql b/aaa.sql | |||
index d8e4c92..19dc8ad 100644 | |||
--- a/aaa.sql | |||
+++ b/aaa.sql | |||
@@ -1,9 +1,10 @@ | |||
--some comment | |||
--- some comment 5 | |||
+--some coment 2 | |||
+-- some comment 3 | |||
create or replace procedure test(p1 varchar2) | |||
is | |||
begin | |||
---new comment | |||
dbms_output.put_line(p1); | |||
+--some other comment | |||
end; | |||
/ | |||
` | |||
func TestCutDiffAroundLine(t *testing.T) { | |||
result := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3) | |||
result, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3) | |||
assert.NoError(t, err) | |||
resultByLine := strings.Split(result, "\n") | |||
assert.Len(t, resultByLine, 7) | |||
// Check if headers got transferred | |||
@@ -37,18 +57,50 @@ func TestCutDiffAroundLine(t *testing.T) { | |||
assert.Equal(t, "+ Build Status", resultByLine[4]) | |||
// Must be same result as before since old line 3 == new line 5 | |||
newResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) | |||
newResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3) | |||
assert.NoError(t, err) | |||
assert.Equal(t, result, newResult, "Must be same result as before since old line 3 == new line 5") | |||
newResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300) | |||
newResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300) | |||
assert.NoError(t, err) | |||
assert.Equal(t, exampleDiff, newResult) | |||
emptyResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0) | |||
emptyResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0) | |||
assert.NoError(t, err) | |||
assert.Empty(t, emptyResult) | |||
// Line is out of scope | |||
emptyResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0) | |||
emptyResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0) | |||
assert.NoError(t, err) | |||
assert.Empty(t, emptyResult) | |||
// Handle minus diffs properly | |||
minusDiff, err := CutDiffAroundLine(strings.NewReader(breakingDiff), 2, false, 4) | |||
assert.NoError(t, err) | |||
expected := `diff --git a/aaa.sql b/aaa.sql | |||
--- a/aaa.sql | |||
+++ b/aaa.sql | |||
@@ -1,9 +1,10 @@ | |||
--some comment | |||
--- some comment 5 | |||
+--some coment 2` | |||
assert.Equal(t, expected, minusDiff) | |||
// Handle minus diffs properly | |||
minusDiff, err = CutDiffAroundLine(strings.NewReader(breakingDiff), 3, false, 4) | |||
assert.NoError(t, err) | |||
expected = `diff --git a/aaa.sql b/aaa.sql | |||
--- a/aaa.sql | |||
+++ b/aaa.sql | |||
@@ -1,9 +1,10 @@ | |||
--some comment | |||
--- some comment 5 | |||
+--some coment 2 | |||
+-- some comment 3` | |||
assert.Equal(t, expected, minusDiff) | |||
} | |||
func BenchmarkCutDiffAroundLine(b *testing.B) { | |||
@@ -69,7 +121,7 @@ func ExampleCutDiffAroundLine() { | |||
Docker Pulls | |||
+ cut off | |||
+ cut off` | |||
result := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3) | |||
result, _ := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3) | |||
println(result) | |||
} | |||
@@ -6,7 +6,6 @@ | |||
package migrations | |||
import ( | |||
"bytes" | |||
"context" | |||
"fmt" | |||
"io" | |||
@@ -802,13 +801,20 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error { | |||
} | |||
var patch string | |||
patchBuf := new(bytes.Buffer) | |||
if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, patchBuf); err != nil { | |||
// We should ignore the error since the commit maybe removed when force push to the pull request | |||
log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) | |||
} else { | |||
patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) | |||
} | |||
reader, writer := io.Pipe() | |||
defer func() { | |||
_ = reader.Close() | |||
_ = writer.Close() | |||
}() | |||
go func() { | |||
if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, writer); err != nil { | |||
// We should ignore the error since the commit maybe removed when force push to the pull request | |||
log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err) | |||
} | |||
_ = writer.Close() | |||
}() | |||
patch, _ = git.CutDiffAroundLine(reader, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) | |||
var c = models.Comment{ | |||
Type: models.CommentTypeCode, |
@@ -583,6 +583,7 @@ type DiffFile struct { | |||
IsBin bool | |||
IsLFSFile bool | |||
IsRenamed bool | |||
IsAmbiguous bool | |||
IsSubmodule bool | |||
Sections []*DiffSection | |||
IsIncomplete bool | |||
@@ -776,12 +777,32 @@ parsingLoop: | |||
if strings.HasSuffix(line, " 160000\n") { | |||
curFile.IsSubmodule = true | |||
} | |||
case strings.HasPrefix(line, "rename from "): | |||
curFile.IsRenamed = true | |||
curFile.Type = DiffFileRename | |||
if curFile.IsAmbiguous { | |||
curFile.OldName = line[len("rename from ") : len(line)-1] | |||
} | |||
case strings.HasPrefix(line, "rename to "): | |||
curFile.IsRenamed = true | |||
curFile.Type = DiffFileRename | |||
if curFile.IsAmbiguous { | |||
curFile.Name = line[len("rename to ") : len(line)-1] | |||
curFile.IsAmbiguous = false | |||
} | |||
case strings.HasPrefix(line, "copy from "): | |||
curFile.IsRenamed = true | |||
curFile.Type = DiffFileCopy | |||
if curFile.IsAmbiguous { | |||
curFile.OldName = line[len("copy from ") : len(line)-1] | |||
} | |||
case strings.HasPrefix(line, "copy to "): | |||
curFile.IsRenamed = true | |||
curFile.Type = DiffFileCopy | |||
if curFile.IsAmbiguous { | |||
curFile.Name = line[len("copy to ") : len(line)-1] | |||
curFile.IsAmbiguous = false | |||
} | |||
case strings.HasPrefix(line, "new file"): | |||
curFile.Type = DiffFileAdd | |||
curFile.IsCreated = true | |||
@@ -803,9 +824,35 @@ parsingLoop: | |||
case strings.HasPrefix(line, "Binary"): | |||
curFile.IsBin = true | |||
case strings.HasPrefix(line, "--- "): | |||
// Do nothing with this line | |||
// Handle ambiguous filenames | |||
if curFile.IsAmbiguous { | |||
if len(line) > 6 && line[4] == 'a' { | |||
curFile.OldName = line[6 : len(line)-1] | |||
if line[len(line)-2] == '\t' { | |||
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1] | |||
} | |||
} else { | |||
curFile.OldName = "" | |||
} | |||
} | |||
// Otherwise do nothing with this line | |||
case strings.HasPrefix(line, "+++ "): | |||
// Do nothing with this line | |||
// Handle ambiguous filenames | |||
if curFile.IsAmbiguous { | |||
if len(line) > 6 && line[4] == 'b' { | |||
curFile.Name = line[6 : len(line)-1] | |||
if line[len(line)-2] == '\t' { | |||
curFile.Name = curFile.Name[:len(curFile.Name)-1] | |||
} | |||
if curFile.OldName == "" { | |||
curFile.OldName = curFile.Name | |||
} | |||
} else { | |||
curFile.Name = curFile.OldName | |||
} | |||
curFile.IsAmbiguous = false | |||
} | |||
// Otherwise do nothing with this line, but now switch to parsing hunks | |||
lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input) | |||
diff.TotalAddition += curFile.Addition | |||
diff.TotalDeletion += curFile.Deletion | |||
@@ -1056,13 +1103,33 @@ func createDiffFile(diff *Diff, line string) *DiffFile { | |||
rd := strings.NewReader(line[len(cmdDiffHead):] + " ") | |||
curFile.Type = DiffFileChange | |||
curFile.OldName = readFileName(rd) | |||
curFile.Name = readFileName(rd) | |||
oldNameAmbiguity := false | |||
newNameAmbiguity := false | |||
curFile.OldName, oldNameAmbiguity = readFileName(rd) | |||
curFile.Name, newNameAmbiguity = readFileName(rd) | |||
if oldNameAmbiguity && newNameAmbiguity { | |||
curFile.IsAmbiguous = true | |||
// OK we should bet that the oldName and the newName are the same if they can be made to be same | |||
// So we need to start again ... | |||
if (len(line)-len(cmdDiffHead)-1)%2 == 0 { | |||
// diff --git a/b b/b b/b b/b b/b b/b | |||
// | |||
midpoint := (len(line) + len(cmdDiffHead) - 1) / 2 | |||
new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:] | |||
if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] { | |||
curFile.OldName = old[2:] | |||
curFile.Name = old[2:] | |||
} | |||
} | |||
} | |||
curFile.IsRenamed = curFile.Name != curFile.OldName | |||
return curFile | |||
} | |||
func readFileName(rd *strings.Reader) string { | |||
func readFileName(rd *strings.Reader) (string, bool) { | |||
ambiguity := false | |||
var name string | |||
char, _ := rd.ReadByte() | |||
_ = rd.UnreadByte() | |||
@@ -1072,9 +1139,24 @@ func readFileName(rd *strings.Reader) string { | |||
name = name[1:] | |||
} | |||
} else { | |||
// This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone | |||
ambiguity = true | |||
fmt.Fscanf(rd, "%s ", &name) | |||
char, _ := rd.ReadByte() | |||
_ = rd.UnreadByte() | |||
for !(char == 0 || char == '"' || char == 'b') { | |||
var suffix string | |||
fmt.Fscanf(rd, "%s ", &suffix) | |||
name += " " + suffix | |||
char, _ = rd.ReadByte() | |||
_ = rd.UnreadByte() | |||
} | |||
} | |||
if len(name) < 2 { | |||
log.Error("Unable to determine name from reader: %v", rd) | |||
return "", true | |||
} | |||
return name[2:] | |||
return name[2:], ambiguity | |||
} | |||
// GetDiffRange builds a Diff between two commits of a repository. | |||
@@ -1191,6 +1273,7 @@ func CommentAsDiff(c *models.Comment) (*Diff, error) { | |||
diff, err := ParsePatch(setting.Git.MaxGitDiffLines, | |||
setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch)) | |||
if err != nil { | |||
log.Error("Unable to parse patch: %v", err) | |||
return nil, err | |||
} | |||
if len(diff.Files) == 0 { |
@@ -208,6 +208,66 @@ rename to a b/a a/file b/b file | |||
oldFilename: "a b/file b/a a/file", | |||
filename: "a b/a a/file b/b file", | |||
}, | |||
{ | |||
name: "ambiguous deleted", | |||
gitdiff: `diff --git a/b b/b b/b b/b | |||
deleted file mode 100644 | |||
index 92e798b..0000000 | |||
--- a/b b/b` + "\t" + ` | |||
+++ /dev/null | |||
@@ -1 +0,0 @@ | |||
-b b/b | |||
`, | |||
oldFilename: "b b/b", | |||
filename: "b b/b", | |||
addition: 0, | |||
deletion: 1, | |||
}, | |||
{ | |||
name: "ambiguous addition", | |||
gitdiff: `diff --git a/b b/b b/b b/b | |||
new file mode 100644 | |||
index 0000000..92e798b | |||
--- /dev/null | |||
+++ b/b b/b` + "\t" + ` | |||
@@ -0,0 +1 @@ | |||
+b b/b | |||
`, | |||
oldFilename: "b b/b", | |||
filename: "b b/b", | |||
addition: 1, | |||
deletion: 0, | |||
}, | |||
{ | |||
name: "rename", | |||
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b | |||
similarity index 100% | |||
rename from b b/b b/b b/b b/b | |||
rename to b | |||
`, | |||
oldFilename: "b b/b b/b b/b b/b", | |||
filename: "b", | |||
}, | |||
{ | |||
name: "ambiguous 1", | |||
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b | |||
similarity index 100% | |||
rename from b b/b b/b b/b b/b | |||
rename to b | |||
`, | |||
oldFilename: "b b/b b/b b/b b/b", | |||
filename: "b", | |||
}, | |||
{ | |||
name: "ambiguous 2", | |||
gitdiff: `diff --git a/b b/b b/b b/b b/b b/b | |||
similarity index 100% | |||
rename from b b/b b/b b/b | |||
rename to b b/b | |||
`, | |||
oldFilename: "b b/b b/b b/b", | |||
filename: "b b/b", | |||
}, | |||
{ | |||
name: "minuses-and-pluses", | |||
gitdiff: `diff --git a/minuses-and-pluses b/minuses-and-pluses | |||
@@ -235,32 +295,32 @@ index 6961180..9ba1a00 100644 | |||
t.Run(testcase.name, func(t *testing.T) { | |||
got, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff)) | |||
if (err != nil) != testcase.wantErr { | |||
t.Errorf("ParsePatch() error = %v, wantErr %v", err, testcase.wantErr) | |||
t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) | |||
return | |||
} | |||
gotMarshaled, _ := json.MarshalIndent(got, " ", " ") | |||
if got.NumFiles != 1 { | |||
t.Errorf("ParsePath() did not receive 1 file:\n%s", string(gotMarshaled)) | |||
t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled)) | |||
return | |||
} | |||
if got.TotalAddition != testcase.addition { | |||
t.Errorf("ParsePath() does not have correct totalAddition %d, wanted %d", got.TotalAddition, testcase.addition) | |||
t.Errorf("ParsePath(%q) does not have correct totalAddition %d, wanted %d", testcase.name, got.TotalAddition, testcase.addition) | |||
} | |||
if got.TotalDeletion != testcase.deletion { | |||
t.Errorf("ParsePath() did not have correct totalDeletion %d, wanted %d", got.TotalDeletion, testcase.deletion) | |||
t.Errorf("ParsePath(%q) did not have correct totalDeletion %d, wanted %d", testcase.name, got.TotalDeletion, testcase.deletion) | |||
} | |||
file := got.Files[0] | |||
if file.Addition != testcase.addition { | |||
t.Errorf("ParsePath() does not have correct file addition %d, wanted %d", file.Addition, testcase.addition) | |||
t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition) | |||
} | |||
if file.Deletion != testcase.deletion { | |||
t.Errorf("ParsePath() did not have correct file deletion %d, wanted %d", file.Deletion, testcase.deletion) | |||
t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion) | |||
} | |||
if file.OldName != testcase.oldFilename { | |||
t.Errorf("ParsePath() did not have correct OldName %s, wanted %s", file.OldName, testcase.oldFilename) | |||
t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename) | |||
} | |||
if file.Name != testcase.filename { | |||
t.Errorf("ParsePath() did not have correct Name %s, wanted %s", file.Name, testcase.filename) | |||
t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename) | |||
} | |||
}) | |||
} |
@@ -6,13 +6,14 @@ | |||
package pull | |||
import ( | |||
"bytes" | |||
"fmt" | |||
"io" | |||
"regexp" | |||
"strings" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
) | |||
@@ -179,11 +180,24 @@ func createCodeComment(doer *models.User, repo *models.Repository, issue *models | |||
if len(commitID) == 0 { | |||
commitID = headCommitID | |||
} | |||
patchBuf := new(bytes.Buffer) | |||
if err := git.GetRepoRawDiffForFile(gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, treePath, patchBuf); err != nil { | |||
return nil, fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", gitRepo.Path, pr.MergeBase, headCommitID, treePath, err) | |||
reader, writer := io.Pipe() | |||
defer func() { | |||
_ = reader.Close() | |||
_ = writer.Close() | |||
}() | |||
go func() { | |||
if err := git.GetRepoRawDiffForFile(gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, treePath, writer); err != nil { | |||
_ = writer.CloseWithError(fmt.Errorf("GetRawDiffForLine[%s, %s, %s, %s]: %v", gitRepo.Path, pr.MergeBase, headCommitID, treePath, err)) | |||
return | |||
} | |||
_ = writer.Close() | |||
}() | |||
patch, err = git.CutDiffAroundLine(reader, int64((&models.Comment{Line: line}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) | |||
if err != nil { | |||
log.Error("Error whilst generating patch: %v", err) | |||
return nil, err | |||
} | |||
patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: line}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines) | |||
} | |||
return models.CreateComment(&models.CreateCommentOptions{ | |||
Type: models.CommentTypeCode, |