You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_name_status.go 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package git
  4. import (
  5. "bufio"
  6. "bytes"
  7. "context"
  8. "errors"
  9. "io"
  10. "path"
  11. "sort"
  12. "strings"
  13. "code.gitea.io/gitea/modules/container"
  14. "github.com/djherbis/buffer"
  15. "github.com/djherbis/nio/v3"
  16. )
  17. // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  18. func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
  19. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
  20. // so let's create a batch stdin and stdout
  21. stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
  22. // Lets also create a context so that we can absolutely ensure that the command should die when we're done
  23. ctx, ctxCancel := context.WithCancel(ctx)
  24. cancel := func() {
  25. ctxCancel()
  26. _ = stdoutReader.Close()
  27. _ = stdoutWriter.Close()
  28. }
  29. cmd := NewCommand(ctx)
  30. cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
  31. var files []string
  32. if len(paths) < 70 {
  33. if treepath != "" {
  34. files = append(files, treepath)
  35. for _, pth := range paths {
  36. if pth != "" {
  37. files = append(files, path.Join(treepath, pth))
  38. }
  39. }
  40. } else {
  41. for _, pth := range paths {
  42. if pth != "" {
  43. files = append(files, pth)
  44. }
  45. }
  46. }
  47. } else if treepath != "" {
  48. files = append(files, treepath)
  49. }
  50. // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
  51. for i, file := range files {
  52. files[i] = ":(literal)" + file
  53. }
  54. cmd.AddDashesAndList(files...)
  55. go func() {
  56. stderr := strings.Builder{}
  57. err := cmd.Run(&RunOpts{
  58. Dir: repository,
  59. Stdout: stdoutWriter,
  60. Stderr: &stderr,
  61. })
  62. if err != nil {
  63. _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
  64. return
  65. }
  66. _ = stdoutWriter.Close()
  67. }()
  68. // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
  69. bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
  70. return bufReader, cancel
  71. }
  72. // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
  73. type LogNameStatusRepoParser struct {
  74. treepath string
  75. paths []string
  76. next []byte
  77. buffull bool
  78. rd *bufio.Reader
  79. cancel func()
  80. }
  81. // NewLogNameStatusRepoParser returns a new parser for a git log raw output
  82. func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
  83. rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
  84. return &LogNameStatusRepoParser{
  85. treepath: treepath,
  86. paths: paths,
  87. rd: rd,
  88. cancel: cancel,
  89. }
  90. }
  91. // LogNameStatusCommitData represents a commit artefact from git log raw
  92. type LogNameStatusCommitData struct {
  93. CommitID string
  94. ParentIDs []string
  95. Paths []bool
  96. }
  97. // Next returns the next LogStatusCommitData
  98. func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
  99. var err error
  100. if g.next == nil || len(g.next) == 0 {
  101. g.buffull = false
  102. g.next, err = g.rd.ReadSlice('\x00')
  103. if err != nil {
  104. if err == bufio.ErrBufferFull {
  105. g.buffull = true
  106. } else if err == io.EOF {
  107. return nil, nil
  108. } else {
  109. return nil, err
  110. }
  111. }
  112. }
  113. ret := LogNameStatusCommitData{}
  114. if bytes.Equal(g.next, []byte("commit\000")) {
  115. g.next, err = g.rd.ReadSlice('\x00')
  116. if err != nil {
  117. if err == bufio.ErrBufferFull {
  118. g.buffull = true
  119. } else if err == io.EOF {
  120. return nil, nil
  121. } else {
  122. return nil, err
  123. }
  124. }
  125. }
  126. // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
  127. ret.CommitID = string(g.next[0:40])
  128. parents := string(g.next[41:])
  129. if g.buffull {
  130. more, err := g.rd.ReadString('\x00')
  131. if err != nil {
  132. return nil, err
  133. }
  134. parents += more
  135. }
  136. parents = parents[:len(parents)-1]
  137. ret.ParentIDs = strings.Split(parents, " ")
  138. // now read the next "line"
  139. g.buffull = false
  140. g.next, err = g.rd.ReadSlice('\x00')
  141. if err != nil {
  142. if err == bufio.ErrBufferFull {
  143. g.buffull = true
  144. } else if err != io.EOF {
  145. return nil, err
  146. }
  147. }
  148. if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
  149. return &ret, nil
  150. }
  151. // Ok we have some changes.
  152. // This line will look like: NL <fname> NUL
  153. //
  154. // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
  155. if g.next[0] == '\n' {
  156. g.next = g.next[1:]
  157. } else {
  158. g.buffull = false
  159. g.next, err = g.rd.ReadSlice('\x00')
  160. if err != nil {
  161. if err == bufio.ErrBufferFull {
  162. g.buffull = true
  163. } else if err != io.EOF {
  164. return nil, err
  165. }
  166. }
  167. if len(g.next) == 0 {
  168. return &ret, nil
  169. }
  170. if g.next[0] == '\x00' {
  171. g.buffull = false
  172. g.next, err = g.rd.ReadSlice('\x00')
  173. if err != nil {
  174. if err == bufio.ErrBufferFull {
  175. g.buffull = true
  176. } else if err != io.EOF {
  177. return nil, err
  178. }
  179. }
  180. }
  181. }
  182. fnameBuf := make([]byte, 4096)
  183. diffloop:
  184. for {
  185. if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
  186. return &ret, nil
  187. }
  188. g.next, err = g.rd.ReadSlice('\x00')
  189. if err != nil {
  190. if err == bufio.ErrBufferFull {
  191. g.buffull = true
  192. } else if err == io.EOF {
  193. return &ret, nil
  194. } else {
  195. return nil, err
  196. }
  197. }
  198. copy(fnameBuf, g.next)
  199. if len(fnameBuf) < len(g.next) {
  200. fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
  201. } else {
  202. fnameBuf = fnameBuf[:len(g.next)]
  203. }
  204. if err != nil {
  205. if err != bufio.ErrBufferFull {
  206. return nil, err
  207. }
  208. more, err := g.rd.ReadBytes('\x00')
  209. if err != nil {
  210. return nil, err
  211. }
  212. fnameBuf = append(fnameBuf, more...)
  213. }
  214. // read the next line
  215. g.buffull = false
  216. g.next, err = g.rd.ReadSlice('\x00')
  217. if err != nil {
  218. if err == bufio.ErrBufferFull {
  219. g.buffull = true
  220. } else if err != io.EOF {
  221. return nil, err
  222. }
  223. }
  224. if treepath != "" {
  225. if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
  226. fnameBuf = fnameBuf[:cap(fnameBuf)]
  227. continue diffloop
  228. }
  229. }
  230. fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
  231. if len(fnameBuf) > maxpathlen {
  232. fnameBuf = fnameBuf[:cap(fnameBuf)]
  233. continue diffloop
  234. }
  235. if len(fnameBuf) > 0 {
  236. if len(treepath) > 0 {
  237. if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
  238. fnameBuf = fnameBuf[:cap(fnameBuf)]
  239. continue diffloop
  240. }
  241. fnameBuf = fnameBuf[1:]
  242. } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
  243. fnameBuf = fnameBuf[:cap(fnameBuf)]
  244. continue diffloop
  245. }
  246. }
  247. idx, ok := paths2ids[string(fnameBuf)]
  248. if !ok {
  249. fnameBuf = fnameBuf[:cap(fnameBuf)]
  250. continue diffloop
  251. }
  252. if ret.Paths == nil {
  253. ret.Paths = changed
  254. }
  255. changed[idx] = true
  256. }
  257. }
  258. // Close closes the parser
  259. func (g *LogNameStatusRepoParser) Close() {
  260. g.cancel()
  261. }
  262. // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
  263. func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
  264. headRef := head.ID.String()
  265. tree, err := head.SubTree(treepath)
  266. if err != nil {
  267. return nil, err
  268. }
  269. entries, err := tree.ListEntries()
  270. if err != nil {
  271. return nil, err
  272. }
  273. if len(paths) == 0 {
  274. paths = make([]string, 0, len(entries)+1)
  275. paths = append(paths, "")
  276. for _, entry := range entries {
  277. paths = append(paths, entry.Name())
  278. }
  279. } else {
  280. sort.Strings(paths)
  281. if paths[0] != "" {
  282. paths = append([]string{""}, paths...)
  283. }
  284. // remove duplicates
  285. for i := len(paths) - 1; i > 0; i-- {
  286. if paths[i] == paths[i-1] {
  287. paths = append(paths[:i-1], paths[i:]...)
  288. }
  289. }
  290. }
  291. path2idx := map[string]int{}
  292. maxpathlen := len(treepath)
  293. for i := range paths {
  294. path2idx[paths[i]] = i
  295. pthlen := len(paths[i]) + len(treepath) + 1
  296. if pthlen > maxpathlen {
  297. maxpathlen = pthlen
  298. }
  299. }
  300. g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
  301. // don't use defer g.Close() here as g may change its value - instead wrap in a func
  302. defer func() {
  303. g.Close()
  304. }()
  305. results := make([]string, len(paths))
  306. remaining := len(paths)
  307. nextRestart := (len(paths) * 3) / 4
  308. if nextRestart > 70 {
  309. nextRestart = 70
  310. }
  311. lastEmptyParent := head.ID.String()
  312. commitSinceLastEmptyParent := uint64(0)
  313. commitSinceNextRestart := uint64(0)
  314. parentRemaining := make(container.Set[string])
  315. changed := make([]bool, len(paths))
  316. heaploop:
  317. for {
  318. select {
  319. case <-ctx.Done():
  320. if ctx.Err() == context.DeadlineExceeded {
  321. break heaploop
  322. }
  323. g.Close()
  324. return nil, ctx.Err()
  325. default:
  326. }
  327. current, err := g.Next(treepath, path2idx, changed, maxpathlen)
  328. if err != nil {
  329. if errors.Is(err, context.DeadlineExceeded) {
  330. break heaploop
  331. }
  332. g.Close()
  333. return nil, err
  334. }
  335. if current == nil {
  336. break heaploop
  337. }
  338. parentRemaining.Remove(current.CommitID)
  339. for i, found := range current.Paths {
  340. if !found {
  341. continue
  342. }
  343. changed[i] = false
  344. if results[i] == "" {
  345. results[i] = current.CommitID
  346. if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
  347. return nil, err
  348. }
  349. delete(path2idx, paths[i])
  350. remaining--
  351. if results[0] == "" {
  352. results[0] = current.CommitID
  353. if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
  354. return nil, err
  355. }
  356. delete(path2idx, "")
  357. remaining--
  358. }
  359. }
  360. }
  361. if remaining <= 0 {
  362. break heaploop
  363. }
  364. commitSinceLastEmptyParent++
  365. if len(parentRemaining) == 0 {
  366. lastEmptyParent = current.CommitID
  367. commitSinceLastEmptyParent = 0
  368. }
  369. if remaining <= nextRestart {
  370. commitSinceNextRestart++
  371. if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
  372. g.Close()
  373. remainingPaths := make([]string, 0, len(paths))
  374. for i, pth := range paths {
  375. if results[i] == "" {
  376. remainingPaths = append(remainingPaths, pth)
  377. }
  378. }
  379. g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
  380. parentRemaining = make(container.Set[string])
  381. nextRestart = (remaining * 3) / 4
  382. continue heaploop
  383. }
  384. }
  385. parentRemaining.AddMultiple(current.ParentIDs...)
  386. }
  387. g.Close()
  388. resultsMap := map[string]string{}
  389. for i, pth := range paths {
  390. resultsMap[pth] = results[i]
  391. }
  392. return resultsMap, nil
  393. }