You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

repo_compare.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Copyright 2015 The Gogs Authors. All rights reserved.
  2. // Copyright 2019 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package git
  5. import (
  6. "bufio"
  7. "bytes"
  8. "context"
  9. "errors"
  10. "fmt"
  11. "io"
  12. "os"
  13. "path/filepath"
  14. "regexp"
  15. "strconv"
  16. "strings"
  17. "time"
  18. logger "code.gitea.io/gitea/modules/log"
  19. )
  20. // CompareInfo represents needed information for comparing references.
  21. type CompareInfo struct {
  22. MergeBase string
  23. BaseCommitID string
  24. HeadCommitID string
  25. Commits []*Commit
  26. NumFiles int
  27. }
  28. // GetMergeBase checks and returns merge base of two branches and the reference used as base.
  29. func (repo *Repository) GetMergeBase(tmpRemote, base, head string) (string, string, error) {
  30. if tmpRemote == "" {
  31. tmpRemote = "origin"
  32. }
  33. if tmpRemote != "origin" {
  34. tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
  35. // Fetch commit into a temporary branch in order to be able to handle commits and tags
  36. _, _, err := NewCommand(repo.Ctx, "fetch", "--no-tags").AddDynamicArguments(tmpRemote).AddDashesAndList(base + ":" + tmpBaseName).RunStdString(&RunOpts{Dir: repo.Path})
  37. if err == nil {
  38. base = tmpBaseName
  39. }
  40. }
  41. stdout, _, err := NewCommand(repo.Ctx, "merge-base").AddDashesAndList(base, head).RunStdString(&RunOpts{Dir: repo.Path})
  42. return strings.TrimSpace(stdout), base, err
  43. }
  44. // GetCompareInfo generates and returns compare information between base and head branches of repositories.
  45. func (repo *Repository) GetCompareInfo(basePath, baseBranch, headBranch string, directComparison, fileOnly bool) (_ *CompareInfo, err error) {
  46. var (
  47. remoteBranch string
  48. tmpRemote string
  49. )
  50. // We don't need a temporary remote for same repository.
  51. if repo.Path != basePath {
  52. // Add a temporary remote
  53. tmpRemote = strconv.FormatInt(time.Now().UnixNano(), 10)
  54. if err = repo.AddRemote(tmpRemote, basePath, false); err != nil {
  55. return nil, fmt.Errorf("AddRemote: %w", err)
  56. }
  57. defer func() {
  58. if err := repo.RemoveRemote(tmpRemote); err != nil {
  59. logger.Error("GetPullRequestInfo: RemoveRemote: %v", err)
  60. }
  61. }()
  62. }
  63. compareInfo := new(CompareInfo)
  64. compareInfo.HeadCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, headBranch)
  65. if err != nil {
  66. compareInfo.HeadCommitID = headBranch
  67. }
  68. compareInfo.MergeBase, remoteBranch, err = repo.GetMergeBase(tmpRemote, baseBranch, headBranch)
  69. if err == nil {
  70. compareInfo.BaseCommitID, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
  71. if err != nil {
  72. compareInfo.BaseCommitID = remoteBranch
  73. }
  74. separator := "..."
  75. baseCommitID := compareInfo.MergeBase
  76. if directComparison {
  77. separator = ".."
  78. baseCommitID = compareInfo.BaseCommitID
  79. }
  80. // We have a common base - therefore we know that ... should work
  81. if !fileOnly {
  82. // avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
  83. var logs []byte
  84. logs, _, err = NewCommand(repo.Ctx, "log").AddArguments(prettyLogFormat).
  85. AddDynamicArguments(baseCommitID + separator + headBranch).AddArguments("--").
  86. RunStdBytes(&RunOpts{Dir: repo.Path})
  87. if err != nil {
  88. return nil, err
  89. }
  90. compareInfo.Commits, err = repo.parsePrettyFormatLogToList(logs)
  91. if err != nil {
  92. return nil, fmt.Errorf("parsePrettyFormatLogToList: %w", err)
  93. }
  94. } else {
  95. compareInfo.Commits = []*Commit{}
  96. }
  97. } else {
  98. compareInfo.Commits = []*Commit{}
  99. compareInfo.MergeBase, err = GetFullCommitID(repo.Ctx, repo.Path, remoteBranch)
  100. if err != nil {
  101. compareInfo.MergeBase = remoteBranch
  102. }
  103. compareInfo.BaseCommitID = compareInfo.MergeBase
  104. }
  105. // Count number of changed files.
  106. // This probably should be removed as we need to use shortstat elsewhere
  107. // Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
  108. compareInfo.NumFiles, err = repo.GetDiffNumChangedFiles(remoteBranch, headBranch, directComparison)
  109. if err != nil {
  110. return nil, err
  111. }
  112. return compareInfo, nil
  113. }
  114. type lineCountWriter struct {
  115. numLines int
  116. }
  117. // Write counts the number of newlines in the provided bytestream
  118. func (l *lineCountWriter) Write(p []byte) (n int, err error) {
  119. n = len(p)
  120. l.numLines += bytes.Count(p, []byte{'\000'})
  121. return n, err
  122. }
  123. // GetDiffNumChangedFiles counts the number of changed files
  124. // This is substantially quicker than shortstat but...
  125. func (repo *Repository) GetDiffNumChangedFiles(base, head string, directComparison bool) (int, error) {
  126. // Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
  127. w := &lineCountWriter{}
  128. stderr := new(bytes.Buffer)
  129. separator := "..."
  130. if directComparison {
  131. separator = ".."
  132. }
  133. // avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
  134. if err := NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base + separator + head).AddArguments("--").
  135. Run(&RunOpts{
  136. Dir: repo.Path,
  137. Stdout: w,
  138. Stderr: stderr,
  139. }); err != nil {
  140. if strings.Contains(stderr.String(), "no merge base") {
  141. // git >= 2.28 now returns an error if base and head have become unrelated.
  142. // previously it would return the results of git diff -z --name-only base head so let's try that...
  143. w = &lineCountWriter{}
  144. stderr.Reset()
  145. if err = NewCommand(repo.Ctx, "diff", "-z", "--name-only").AddDynamicArguments(base, head).AddArguments("--").Run(&RunOpts{
  146. Dir: repo.Path,
  147. Stdout: w,
  148. Stderr: stderr,
  149. }); err == nil {
  150. return w.numLines, nil
  151. }
  152. }
  153. return 0, fmt.Errorf("%w: Stderr: %s", err, stderr)
  154. }
  155. return w.numLines, nil
  156. }
  157. // GetDiffShortStat counts number of changed files, number of additions and deletions
  158. func (repo *Repository) GetDiffShortStat(base, head string) (numFiles, totalAdditions, totalDeletions int, err error) {
  159. numFiles, totalAdditions, totalDeletions, err = GetDiffShortStat(repo.Ctx, repo.Path, nil, base+"..."+head)
  160. if err != nil && strings.Contains(err.Error(), "no merge base") {
  161. return GetDiffShortStat(repo.Ctx, repo.Path, nil, base, head)
  162. }
  163. return numFiles, totalAdditions, totalDeletions, err
  164. }
  165. // GetDiffShortStat counts number of changed files, number of additions and deletions
  166. func GetDiffShortStat(ctx context.Context, repoPath string, trustedArgs TrustedCmdArgs, dynamicArgs ...string) (numFiles, totalAdditions, totalDeletions int, err error) {
  167. // Now if we call:
  168. // $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
  169. // we get:
  170. // " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
  171. cmd := NewCommand(ctx, "diff", "--shortstat").AddArguments(trustedArgs...).AddDynamicArguments(dynamicArgs...)
  172. stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repoPath})
  173. if err != nil {
  174. return 0, 0, 0, err
  175. }
  176. return parseDiffStat(stdout)
  177. }
  178. var shortStatFormat = regexp.MustCompile(
  179. `\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
  180. var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`)
  181. func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) {
  182. if len(stdout) == 0 || stdout == "\n" {
  183. return 0, 0, 0, nil
  184. }
  185. groups := shortStatFormat.FindStringSubmatch(stdout)
  186. if len(groups) != 4 {
  187. return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups)
  188. }
  189. numFiles, err = strconv.Atoi(groups[1])
  190. if err != nil {
  191. return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %w", stdout, err)
  192. }
  193. if len(groups[2]) != 0 {
  194. totalAdditions, err = strconv.Atoi(groups[2])
  195. if err != nil {
  196. return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %w", stdout, err)
  197. }
  198. }
  199. if len(groups[3]) != 0 {
  200. totalDeletions, err = strconv.Atoi(groups[3])
  201. if err != nil {
  202. return 0, 0, 0, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %w", stdout, err)
  203. }
  204. }
  205. return numFiles, totalAdditions, totalDeletions, err
  206. }
  207. // GetDiffOrPatch generates either diff or formatted patch data between given revisions
  208. func (repo *Repository) GetDiffOrPatch(base, head string, w io.Writer, patch, binary bool) error {
  209. if patch {
  210. return repo.GetPatch(base, head, w)
  211. }
  212. if binary {
  213. return repo.GetDiffBinary(base, head, w)
  214. }
  215. return repo.GetDiff(base, head, w)
  216. }
  217. // GetDiff generates and returns patch data between given revisions, optimized for human readability
  218. func (repo *Repository) GetDiff(base, head string, w io.Writer) error {
  219. return NewCommand(repo.Ctx, "diff", "-p").AddDynamicArguments(base, head).Run(&RunOpts{
  220. Dir: repo.Path,
  221. Stdout: w,
  222. })
  223. }
  224. // GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
  225. func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error {
  226. return NewCommand(repo.Ctx, "diff", "-p", "--binary", "--histogram").AddDynamicArguments(base, head).Run(&RunOpts{
  227. Dir: repo.Path,
  228. Stdout: w,
  229. })
  230. }
  231. // GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
  232. func (repo *Repository) GetPatch(base, head string, w io.Writer) error {
  233. stderr := new(bytes.Buffer)
  234. err := NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base + "..." + head).
  235. Run(&RunOpts{
  236. Dir: repo.Path,
  237. Stdout: w,
  238. Stderr: stderr,
  239. })
  240. if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
  241. return NewCommand(repo.Ctx, "format-patch", "--binary", "--stdout").AddDynamicArguments(base, head).
  242. Run(&RunOpts{
  243. Dir: repo.Path,
  244. Stdout: w,
  245. })
  246. }
  247. return err
  248. }
  249. // GetFilesChangedBetween returns a list of all files that have been changed between the given commits
  250. // If base is undefined empty SHA (zeros), it only returns the files changed in the head commit
  251. // If base is the SHA of an empty tree (EmptyTreeSHA), it returns the files changes from the initial commit to the head commit
  252. func (repo *Repository) GetFilesChangedBetween(base, head string) ([]string, error) {
  253. cmd := NewCommand(repo.Ctx, "diff-tree", "--name-only", "--root", "--no-commit-id", "-r", "-z")
  254. if base == repo.objectFormat.EmptyObjectID().String() {
  255. cmd.AddDynamicArguments(head)
  256. } else {
  257. cmd.AddDynamicArguments(base, head)
  258. }
  259. stdout, _, err := cmd.RunStdString(&RunOpts{Dir: repo.Path})
  260. if err != nil {
  261. return nil, err
  262. }
  263. split := strings.Split(stdout, "\000")
  264. // Because Git will always emit filenames with a terminal NUL ignore the last entry in the split - which will always be empty.
  265. if len(split) > 0 {
  266. split = split[:len(split)-1]
  267. }
  268. return split, err
  269. }
  270. // GetDiffFromMergeBase generates and return patch data from merge base to head
  271. func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) error {
  272. stderr := new(bytes.Buffer)
  273. err := NewCommand(repo.Ctx, "diff", "-p", "--binary").AddDynamicArguments(base + "..." + head).
  274. Run(&RunOpts{
  275. Dir: repo.Path,
  276. Stdout: w,
  277. Stderr: stderr,
  278. })
  279. if err != nil && bytes.Contains(stderr.Bytes(), []byte("no merge base")) {
  280. return repo.GetDiffBinary(base, head, w)
  281. }
  282. return err
  283. }
  284. // ReadPatchCommit will check if a diff patch exists and return stats
  285. func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) {
  286. // Migrated repositories download patches to "pulls" location
  287. patchFile := fmt.Sprintf("pulls/%d.patch", prID)
  288. loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile))
  289. if err != nil {
  290. return "", err
  291. }
  292. defer loadPatch.Close()
  293. // Read only the first line of the patch - usually it contains the first commit made in patch
  294. scanner := bufio.NewScanner(loadPatch)
  295. scanner.Scan()
  296. // Parse the Patch stats, sometimes Migration returns a 404 for the patch file
  297. commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text())
  298. if len(commitSHAGroups) != 0 {
  299. commitSHA = commitSHAGroups[1]
  300. } else {
  301. return "", errors.New("patch file doesn't contain valid commit ID")
  302. }
  303. return commitSHA, nil
  304. }