diff options
Diffstat (limited to 'services/gitdiff/git_diff_tree.go')
-rw-r--r-- | services/gitdiff/git_diff_tree.go | 249 |
1 files changed, 249 insertions, 0 deletions
diff --git a/services/gitdiff/git_diff_tree.go b/services/gitdiff/git_diff_tree.go new file mode 100644 index 0000000000..8039de145d --- /dev/null +++ b/services/gitdiff/git_diff_tree.go @@ -0,0 +1,249 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "bufio" + "context" + "fmt" + "io" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" +) + +type DiffTree struct { + Files []*DiffTreeRecord +} + +type DiffTreeRecord struct { + // Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied', 'typechanged', 'unmerged', 'unknown' + Status string + + // For renames and copies, the percentage of similarity between the source and target of the move/rename. + Score uint8 + + HeadPath string + BasePath string + HeadMode git.EntryMode + BaseMode git.EntryMode + HeadBlobID string + BaseBlobID string +} + +// GetDiffTree returns the list of path of the files that have changed between the two commits. +// If useMergeBase is true, the diff will be calculated using the merge base of the two commits. +// This is the same behavior as using a three-dot diff in git diff. +func GetDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (*DiffTree, error) { + gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, useMergeBase, baseSha, headSha) + if err != nil { + return nil, err + } + + return &DiffTree{ + Files: gitDiffTreeRecords, + }, nil +} + +func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) ([]*DiffTreeRecord, error) { + useMergeBase, baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, useMergeBase, baseSha, headSha) + if err != nil { + return nil, err + } + + cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames", "--root") + if useMergeBase { + cmd.AddArguments("--merge-base") + } + cmd.AddDynamicArguments(baseCommitID, headCommitID) + stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path}) + if runErr != nil { + log.Warn("git diff-tree: %v", runErr) + return nil, runErr + } + + return parseGitDiffTree(strings.NewReader(stdout)) +} + +func validateGitDiffTreeArguments(gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (shouldUseMergeBase bool, resolvedBaseSha, resolvedHeadSha string, err error) { + // if the head is empty its an error + if headSha == "" { + return false, "", "", fmt.Errorf("headSha is empty") + } + + // if the head commit doesn't exist its and error + headCommit, err := gitRepo.GetCommit(headSha) + if err != nil { + return false, "", "", fmt.Errorf("failed to get commit headSha: %v", err) + } + headCommitID := headCommit.ID.String() + + // if the base is empty we should use the parent of the head commit + if baseSha == "" { + // if the headCommit has no parent we should use an empty commit + // this can happen when we are generating a diff against an orphaned commit + if headCommit.ParentCount() == 0 { + objectFormat, err := gitRepo.GetObjectFormat() + if err != nil { + return false, "", "", err + } + + // We set use merge base to false because we have no base commit + return false, objectFormat.EmptyTree().String(), headCommitID, nil + } + + baseCommit, err := headCommit.Parent(0) + if err != nil { + return false, "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err) + } + return useMergeBase, baseCommit.ID.String(), headCommitID, nil + } + + // try and get the base commit + baseCommit, err := gitRepo.GetCommit(baseSha) + // propagate the error if we couldn't get the base commit + if err != nil { + return useMergeBase, "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err) + } + + return useMergeBase, baseCommit.ID.String(), headCommit.ID.String(), nil +} + +func parseGitDiffTree(gitOutput io.Reader) ([]*DiffTreeRecord, error) { + /* + The output of `git diff-tree --raw -r --find-renames` is of the form: + + :<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path> + + or for renames: + + :<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path> + + See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details + */ + results := make([]*DiffTreeRecord, 0) + + lines := bufio.NewScanner(gitOutput) + for lines.Scan() { + line := lines.Text() + + if len(line) == 0 { + continue + } + + record, err := parseGitDiffTreeLine(line) + if err != nil { + return nil, err + } + + results = append(results, record) + } + + if err := lines.Err(); err != nil { + return nil, err + } + + return results, nil +} + +func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) { + line = strings.TrimPrefix(line, ":") + splitSections := strings.SplitN(line, "\t", 2) + if len(splitSections) < 2 { + return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`)", line) + } + + fields := strings.Fields(splitSections[0]) + if len(fields) < 5 { + return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields)) + } + + baseMode, err := git.ParseEntryMode(fields[0]) + if err != nil { + return nil, err + } + + headMode, err := git.ParseEntryMode(fields[1]) + if err != nil { + return nil, err + } + + baseBlobID := fields[2] + headBlobID := fields[3] + + status, score, err := statusFromLetter(fields[4]) + if err != nil { + return nil, fmt.Errorf("unparsable output for diff-tree --raw: %s, error: %s", line, err) + } + + filePaths := strings.Split(splitSections[1], "\t") + + var headPath, basePath string + if status == "renamed" { + if len(filePaths) != 2 { + return nil, fmt.Errorf("unparsable output for diff-tree --raw: `%s`, expected 2 paths found %d", line, len(filePaths)) + } + basePath = filePaths[0] + headPath = filePaths[1] + } else { + basePath = filePaths[0] + headPath = filePaths[0] + } + + return &DiffTreeRecord{ + Status: status, + Score: score, + BaseMode: baseMode, + HeadMode: headMode, + BaseBlobID: baseBlobID, + HeadBlobID: headBlobID, + BasePath: basePath, + HeadPath: headPath, + }, nil +} + +func statusFromLetter(rawStatus string) (status string, score uint8, err error) { + if len(rawStatus) < 1 { + return "", 0, fmt.Errorf("empty status letter") + } + switch rawStatus[0] { + case 'A': + return "added", 0, nil + case 'D': + return "deleted", 0, nil + case 'M': + return "modified", 0, nil + case 'R': + score, err = tryParseStatusScore(rawStatus) + return "renamed", score, err + case 'C': + score, err = tryParseStatusScore(rawStatus) + return "copied", score, err + case 'T': + return "typechanged", 0, nil + case 'U': + return "unmerged", 0, nil + case 'X': + return "unknown", 0, nil + default: + return "", 0, fmt.Errorf("unknown status letter: '%s'", rawStatus) + } +} + +func tryParseStatusScore(rawStatus string) (uint8, error) { + if len(rawStatus) < 2 { + return 0, fmt.Errorf("status score missing") + } + + score, err := strconv.ParseUint(rawStatus[1:], 10, 8) + if err != nil { + return 0, fmt.Errorf("failed to parse status score: %w", err) + } else if score > 100 { + return 0, fmt.Errorf("status score out of range: %d", score) + } + + return uint8(score), nil +} |