You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_name_status.go 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package git
  4. import (
  5. "bufio"
  6. "bytes"
  7. "context"
  8. "errors"
  9. "io"
  10. "path"
  11. "sort"
  12. "strings"
  13. "code.gitea.io/gitea/modules/container"
  14. "github.com/djherbis/buffer"
  15. "github.com/djherbis/nio/v3"
  16. )
  17. // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  18. func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
  19. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
  20. // so let's create a batch stdin and stdout
  21. stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
  22. // Lets also create a context so that we can absolutely ensure that the command should die when we're done
  23. ctx, ctxCancel := context.WithCancel(ctx)
  24. cancel := func() {
  25. ctxCancel()
  26. _ = stdoutReader.Close()
  27. _ = stdoutWriter.Close()
  28. }
  29. cmd := NewCommand(ctx)
  30. cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
  31. var files []string
  32. if len(paths) < 70 {
  33. if treepath != "" {
  34. files = append(files, treepath)
  35. for _, pth := range paths {
  36. if pth != "" {
  37. files = append(files, path.Join(treepath, pth))
  38. }
  39. }
  40. } else {
  41. for _, pth := range paths {
  42. if pth != "" {
  43. files = append(files, pth)
  44. }
  45. }
  46. }
  47. } else if treepath != "" {
  48. files = append(files, treepath)
  49. }
  50. // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
  51. for i, file := range files {
  52. files[i] = ":(literal)" + file
  53. }
  54. cmd.AddDashesAndList(files...)
  55. go func() {
  56. stderr := strings.Builder{}
  57. err := cmd.Run(&RunOpts{
  58. Dir: repository,
  59. Stdout: stdoutWriter,
  60. Stderr: &stderr,
  61. })
  62. if err != nil {
  63. _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
  64. return
  65. }
  66. _ = stdoutWriter.Close()
  67. }()
  68. // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
  69. bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
  70. return bufReader, cancel
  71. }
  72. // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
  73. type LogNameStatusRepoParser struct {
  74. treepath string
  75. paths []string
  76. next []byte
  77. buffull bool
  78. rd *bufio.Reader
  79. cancel func()
  80. }
  81. // NewLogNameStatusRepoParser returns a new parser for a git log raw output
  82. func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
  83. rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
  84. return &LogNameStatusRepoParser{
  85. treepath: treepath,
  86. paths: paths,
  87. rd: rd,
  88. cancel: cancel,
  89. }
  90. }
  91. // LogNameStatusCommitData represents a commit artefact from git log raw
  92. type LogNameStatusCommitData struct {
  93. CommitID string
  94. ParentIDs []string
  95. Paths []bool
  96. }
  97. // Next returns the next LogStatusCommitData
  98. func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
  99. var err error
  100. if g.next == nil || len(g.next) == 0 {
  101. g.buffull = false
  102. g.next, err = g.rd.ReadSlice('\x00')
  103. if err != nil {
  104. if err == bufio.ErrBufferFull {
  105. g.buffull = true
  106. } else if err == io.EOF {
  107. return nil, nil
  108. } else {
  109. return nil, err
  110. }
  111. }
  112. }
  113. ret := LogNameStatusCommitData{}
  114. if bytes.Equal(g.next, []byte("commit\000")) {
  115. g.next, err = g.rd.ReadSlice('\x00')
  116. if err != nil {
  117. if err == bufio.ErrBufferFull {
  118. g.buffull = true
  119. } else if err == io.EOF {
  120. return nil, nil
  121. } else {
  122. return nil, err
  123. }
  124. }
  125. }
  126. // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
  127. commitIDs := string(g.next)
  128. if g.buffull {
  129. more, err := g.rd.ReadString('\x00')
  130. if err != nil {
  131. return nil, err
  132. }
  133. commitIDs += more
  134. }
  135. commitIDs = commitIDs[:len(commitIDs)-1]
  136. splitIDs := strings.Split(commitIDs, " ")
  137. ret.CommitID = splitIDs[0]
  138. if len(splitIDs) > 1 {
  139. ret.ParentIDs = splitIDs[1:]
  140. }
  141. // now read the next "line"
  142. g.buffull = false
  143. g.next, err = g.rd.ReadSlice('\x00')
  144. if err != nil {
  145. if err == bufio.ErrBufferFull {
  146. g.buffull = true
  147. } else if err != io.EOF {
  148. return nil, err
  149. }
  150. }
  151. if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
  152. return &ret, nil
  153. }
  154. // Ok we have some changes.
  155. // This line will look like: NL <fname> NUL
  156. //
  157. // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
  158. if g.next[0] == '\n' {
  159. g.next = g.next[1:]
  160. } else {
  161. g.buffull = false
  162. g.next, err = g.rd.ReadSlice('\x00')
  163. if err != nil {
  164. if err == bufio.ErrBufferFull {
  165. g.buffull = true
  166. } else if err != io.EOF {
  167. return nil, err
  168. }
  169. }
  170. if len(g.next) == 0 {
  171. return &ret, nil
  172. }
  173. if g.next[0] == '\x00' {
  174. g.buffull = false
  175. g.next, err = g.rd.ReadSlice('\x00')
  176. if err != nil {
  177. if err == bufio.ErrBufferFull {
  178. g.buffull = true
  179. } else if err != io.EOF {
  180. return nil, err
  181. }
  182. }
  183. }
  184. }
  185. fnameBuf := make([]byte, 4096)
  186. diffloop:
  187. for {
  188. if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
  189. return &ret, nil
  190. }
  191. g.next, err = g.rd.ReadSlice('\x00')
  192. if err != nil {
  193. if err == bufio.ErrBufferFull {
  194. g.buffull = true
  195. } else if err == io.EOF {
  196. return &ret, nil
  197. } else {
  198. return nil, err
  199. }
  200. }
  201. copy(fnameBuf, g.next)
  202. if len(fnameBuf) < len(g.next) {
  203. fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
  204. } else {
  205. fnameBuf = fnameBuf[:len(g.next)]
  206. }
  207. if err != nil {
  208. if err != bufio.ErrBufferFull {
  209. return nil, err
  210. }
  211. more, err := g.rd.ReadBytes('\x00')
  212. if err != nil {
  213. return nil, err
  214. }
  215. fnameBuf = append(fnameBuf, more...)
  216. }
  217. // read the next line
  218. g.buffull = false
  219. g.next, err = g.rd.ReadSlice('\x00')
  220. if err != nil {
  221. if err == bufio.ErrBufferFull {
  222. g.buffull = true
  223. } else if err != io.EOF {
  224. return nil, err
  225. }
  226. }
  227. if treepath != "" {
  228. if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
  229. fnameBuf = fnameBuf[:cap(fnameBuf)]
  230. continue diffloop
  231. }
  232. }
  233. fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
  234. if len(fnameBuf) > maxpathlen {
  235. fnameBuf = fnameBuf[:cap(fnameBuf)]
  236. continue diffloop
  237. }
  238. if len(fnameBuf) > 0 {
  239. if len(treepath) > 0 {
  240. if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
  241. fnameBuf = fnameBuf[:cap(fnameBuf)]
  242. continue diffloop
  243. }
  244. fnameBuf = fnameBuf[1:]
  245. } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
  246. fnameBuf = fnameBuf[:cap(fnameBuf)]
  247. continue diffloop
  248. }
  249. }
  250. idx, ok := paths2ids[string(fnameBuf)]
  251. if !ok {
  252. fnameBuf = fnameBuf[:cap(fnameBuf)]
  253. continue diffloop
  254. }
  255. if ret.Paths == nil {
  256. ret.Paths = changed
  257. }
  258. changed[idx] = true
  259. }
  260. }
  261. // Close closes the parser
  262. func (g *LogNameStatusRepoParser) Close() {
  263. g.cancel()
  264. }
  265. // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
  266. func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
  267. headRef := head.ID.String()
  268. tree, err := head.SubTree(treepath)
  269. if err != nil {
  270. return nil, err
  271. }
  272. entries, err := tree.ListEntries()
  273. if err != nil {
  274. return nil, err
  275. }
  276. if len(paths) == 0 {
  277. paths = make([]string, 0, len(entries)+1)
  278. paths = append(paths, "")
  279. for _, entry := range entries {
  280. paths = append(paths, entry.Name())
  281. }
  282. } else {
  283. sort.Strings(paths)
  284. if paths[0] != "" {
  285. paths = append([]string{""}, paths...)
  286. }
  287. // remove duplicates
  288. for i := len(paths) - 1; i > 0; i-- {
  289. if paths[i] == paths[i-1] {
  290. paths = append(paths[:i-1], paths[i:]...)
  291. }
  292. }
  293. }
  294. path2idx := map[string]int{}
  295. maxpathlen := len(treepath)
  296. for i := range paths {
  297. path2idx[paths[i]] = i
  298. pthlen := len(paths[i]) + len(treepath) + 1
  299. if pthlen > maxpathlen {
  300. maxpathlen = pthlen
  301. }
  302. }
  303. g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
  304. // don't use defer g.Close() here as g may change its value - instead wrap in a func
  305. defer func() {
  306. g.Close()
  307. }()
  308. results := make([]string, len(paths))
  309. remaining := len(paths)
  310. nextRestart := (len(paths) * 3) / 4
  311. if nextRestart > 70 {
  312. nextRestart = 70
  313. }
  314. lastEmptyParent := head.ID.String()
  315. commitSinceLastEmptyParent := uint64(0)
  316. commitSinceNextRestart := uint64(0)
  317. parentRemaining := make(container.Set[string])
  318. changed := make([]bool, len(paths))
  319. heaploop:
  320. for {
  321. select {
  322. case <-ctx.Done():
  323. if ctx.Err() == context.DeadlineExceeded {
  324. break heaploop
  325. }
  326. g.Close()
  327. return nil, ctx.Err()
  328. default:
  329. }
  330. current, err := g.Next(treepath, path2idx, changed, maxpathlen)
  331. if err != nil {
  332. if errors.Is(err, context.DeadlineExceeded) {
  333. break heaploop
  334. }
  335. g.Close()
  336. return nil, err
  337. }
  338. if current == nil {
  339. break heaploop
  340. }
  341. parentRemaining.Remove(current.CommitID)
  342. for i, found := range current.Paths {
  343. if !found {
  344. continue
  345. }
  346. changed[i] = false
  347. if results[i] == "" {
  348. results[i] = current.CommitID
  349. if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
  350. return nil, err
  351. }
  352. delete(path2idx, paths[i])
  353. remaining--
  354. if results[0] == "" {
  355. results[0] = current.CommitID
  356. if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
  357. return nil, err
  358. }
  359. delete(path2idx, "")
  360. remaining--
  361. }
  362. }
  363. }
  364. if remaining <= 0 {
  365. break heaploop
  366. }
  367. commitSinceLastEmptyParent++
  368. if len(parentRemaining) == 0 {
  369. lastEmptyParent = current.CommitID
  370. commitSinceLastEmptyParent = 0
  371. }
  372. if remaining <= nextRestart {
  373. commitSinceNextRestart++
  374. if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
  375. g.Close()
  376. remainingPaths := make([]string, 0, len(paths))
  377. for i, pth := range paths {
  378. if results[i] == "" {
  379. remainingPaths = append(remainingPaths, pth)
  380. }
  381. }
  382. g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
  383. parentRemaining = make(container.Set[string])
  384. nextRestart = (remaining * 3) / 4
  385. continue heaploop
  386. }
  387. }
  388. parentRemaining.AddMultiple(current.ParentIDs...)
  389. }
  390. g.Close()
  391. resultsMap := map[string]string{}
  392. for i, pth := range paths {
  393. resultsMap[pth] = results[i]
  394. }
  395. return resultsMap, nil
  396. }