You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

batch_reader.go 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // Copyright 2020 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package git
  4. import (
  5. "bufio"
  6. "bytes"
  7. "context"
  8. "fmt"
  9. "io"
  10. "math"
  11. "runtime"
  12. "strconv"
  13. "strings"
  14. "code.gitea.io/gitea/modules/log"
  15. "github.com/djherbis/buffer"
  16. "github.com/djherbis/nio/v3"
  17. )
  18. // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
  19. type WriteCloserError interface {
  20. io.WriteCloser
  21. CloseWithError(err error) error
  22. }
  23. // EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
  24. // Run before opening git cat-file.
  25. // This is needed otherwise the git cat-file will hang for invalid repositories.
  26. func EnsureValidGitRepository(ctx context.Context, repoPath string) error {
  27. stderr := strings.Builder{}
  28. err := NewCommand(ctx, "rev-parse").
  29. SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)).
  30. Run(&RunOpts{
  31. Dir: repoPath,
  32. Stderr: &stderr,
  33. })
  34. if err != nil {
  35. return ConcatenateError(err, (&stderr).String())
  36. }
  37. return nil
  38. }
  39. // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  40. func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
  41. batchStdinReader, batchStdinWriter := io.Pipe()
  42. batchStdoutReader, batchStdoutWriter := io.Pipe()
  43. ctx, ctxCancel := context.WithCancel(ctx)
  44. closed := make(chan struct{})
  45. cancel := func() {
  46. ctxCancel()
  47. _ = batchStdoutReader.Close()
  48. _ = batchStdinWriter.Close()
  49. <-closed
  50. }
  51. // Ensure cancel is called as soon as the provided context is cancelled
  52. go func() {
  53. <-ctx.Done()
  54. cancel()
  55. }()
  56. _, filename, line, _ := runtime.Caller(2)
  57. filename = strings.TrimPrefix(filename, callerPrefix)
  58. go func() {
  59. stderr := strings.Builder{}
  60. err := NewCommand(ctx, "cat-file", "--batch-check").
  61. SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
  62. Run(&RunOpts{
  63. Dir: repoPath,
  64. Stdin: batchStdinReader,
  65. Stdout: batchStdoutWriter,
  66. Stderr: &stderr,
  67. UseContextTimeout: true,
  68. })
  69. if err != nil {
  70. _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
  71. _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
  72. } else {
  73. _ = batchStdoutWriter.Close()
  74. _ = batchStdinReader.Close()
  75. }
  76. close(closed)
  77. }()
  78. // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
  79. batchReader := bufio.NewReader(batchStdoutReader)
  80. return batchStdinWriter, batchReader, cancel
  81. }
  82. // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
  83. func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
  84. // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
  85. // so let's create a batch stdin and stdout
  86. batchStdinReader, batchStdinWriter := io.Pipe()
  87. batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
  88. ctx, ctxCancel := context.WithCancel(ctx)
  89. closed := make(chan struct{})
  90. cancel := func() {
  91. ctxCancel()
  92. _ = batchStdinWriter.Close()
  93. _ = batchStdoutReader.Close()
  94. <-closed
  95. }
  96. // Ensure cancel is called as soon as the provided context is cancelled
  97. go func() {
  98. <-ctx.Done()
  99. cancel()
  100. }()
  101. _, filename, line, _ := runtime.Caller(2)
  102. filename = strings.TrimPrefix(filename, callerPrefix)
  103. go func() {
  104. stderr := strings.Builder{}
  105. err := NewCommand(ctx, "cat-file", "--batch").
  106. SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
  107. Run(&RunOpts{
  108. Dir: repoPath,
  109. Stdin: batchStdinReader,
  110. Stdout: batchStdoutWriter,
  111. Stderr: &stderr,
  112. UseContextTimeout: true,
  113. })
  114. if err != nil {
  115. _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
  116. _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
  117. } else {
  118. _ = batchStdoutWriter.Close()
  119. _ = batchStdinReader.Close()
  120. }
  121. close(closed)
  122. }()
  123. // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
  124. batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
  125. return batchStdinWriter, batchReader, cancel
  126. }
  127. // ReadBatchLine reads the header line from cat-file --batch
  128. // We expect:
  129. // <sha> SP <type> SP <size> LF
  130. // sha is a hex encoded here
  131. func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
  132. typ, err = rd.ReadString('\n')
  133. if err != nil {
  134. return sha, typ, size, err
  135. }
  136. if len(typ) == 1 {
  137. typ, err = rd.ReadString('\n')
  138. if err != nil {
  139. return sha, typ, size, err
  140. }
  141. }
  142. idx := strings.IndexByte(typ, ' ')
  143. if idx < 0 {
  144. log.Debug("missing space typ: %s", typ)
  145. return sha, typ, size, ErrNotExist{ID: string(sha)}
  146. }
  147. sha = []byte(typ[:idx])
  148. typ = typ[idx+1:]
  149. idx = strings.IndexByte(typ, ' ')
  150. if idx < 0 {
  151. return sha, typ, size, ErrNotExist{ID: string(sha)}
  152. }
  153. sizeStr := typ[idx+1 : len(typ)-1]
  154. typ = typ[:idx]
  155. size, err = strconv.ParseInt(sizeStr, 10, 64)
  156. return sha, typ, size, err
  157. }
  158. // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
  159. func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
  160. var id string
  161. var n int64
  162. headerLoop:
  163. for {
  164. line, err := rd.ReadBytes('\n')
  165. if err != nil {
  166. return "", err
  167. }
  168. n += int64(len(line))
  169. idx := bytes.Index(line, []byte{' '})
  170. if idx < 0 {
  171. continue
  172. }
  173. if string(line[:idx]) == "object" {
  174. id = string(line[idx+1 : len(line)-1])
  175. break headerLoop
  176. }
  177. }
  178. // Discard the rest of the tag
  179. return id, DiscardFull(rd, size-n+1)
  180. }
  181. // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
  182. func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
  183. var id string
  184. var n int64
  185. headerLoop:
  186. for {
  187. line, err := rd.ReadBytes('\n')
  188. if err != nil {
  189. return "", err
  190. }
  191. n += int64(len(line))
  192. idx := bytes.Index(line, []byte{' '})
  193. if idx < 0 {
  194. continue
  195. }
  196. if string(line[:idx]) == "tree" {
  197. id = string(line[idx+1 : len(line)-1])
  198. break headerLoop
  199. }
  200. }
  201. // Discard the rest of the commit
  202. return id, DiscardFull(rd, size-n+1)
  203. }
  204. // git tree files are a list:
  205. // <mode-in-ascii> SP <fname> NUL <binary Hash>
  206. //
  207. // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
  208. // Therefore we need some method to convert these binary hashes to hex hashes
  209. // constant hextable to help quickly convert between binary and hex representation
  210. const hextable = "0123456789abcdef"
  211. // BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the
  212. // same byte slice to support in place conversion without allocations.
  213. // This is at least 100x quicker that hex.EncodeToString
  214. func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
  215. for i := objectFormat.FullLength()/2 - 1; i >= 0; i-- {
  216. v := sha[i]
  217. vhi, vlo := v>>4, v&0x0f
  218. shi, slo := hextable[vhi], hextable[vlo]
  219. out[i*2], out[i*2+1] = shi, slo
  220. }
  221. return out
  222. }
  223. // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
  224. // This carefully avoids allocations - except where fnameBuf is too small.
  225. // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
  226. //
  227. // Each line is composed of:
  228. // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
  229. //
  230. // We don't attempt to convert the raw HASH to save a lot of time
  231. func ParseTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
  232. var readBytes []byte
  233. // Read the Mode & fname
  234. readBytes, err = rd.ReadSlice('\x00')
  235. if err != nil {
  236. return mode, fname, sha, n, err
  237. }
  238. idx := bytes.IndexByte(readBytes, ' ')
  239. if idx < 0 {
  240. log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes)
  241. return mode, fname, sha, n, &ErrNotExist{}
  242. }
  243. n += idx + 1
  244. copy(modeBuf, readBytes[:idx])
  245. if len(modeBuf) >= idx {
  246. modeBuf = modeBuf[:idx]
  247. } else {
  248. modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
  249. }
  250. mode = modeBuf
  251. readBytes = readBytes[idx+1:]
  252. // Deal with the fname
  253. copy(fnameBuf, readBytes)
  254. if len(fnameBuf) > len(readBytes) {
  255. fnameBuf = fnameBuf[:len(readBytes)]
  256. } else {
  257. fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
  258. }
  259. for err == bufio.ErrBufferFull {
  260. readBytes, err = rd.ReadSlice('\x00')
  261. fnameBuf = append(fnameBuf, readBytes...)
  262. }
  263. n += len(fnameBuf)
  264. if err != nil {
  265. return mode, fname, sha, n, err
  266. }
  267. fnameBuf = fnameBuf[:len(fnameBuf)-1]
  268. fname = fnameBuf
  269. // Deal with the binary hash
  270. idx = 0
  271. length := objectFormat.FullLength() / 2
  272. for idx < length {
  273. var read int
  274. read, err = rd.Read(shaBuf[idx:length])
  275. n += read
  276. if err != nil {
  277. return mode, fname, sha, n, err
  278. }
  279. idx += read
  280. }
  281. sha = shaBuf
  282. return mode, fname, sha, n, err
  283. }
  284. var callerPrefix string
  285. func init() {
  286. _, filename, _, _ := runtime.Caller(0)
  287. callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go")
  288. }
  289. func DiscardFull(rd *bufio.Reader, discard int64) error {
  290. if discard > math.MaxInt32 {
  291. n, err := rd.Discard(math.MaxInt32)
  292. discard -= int64(n)
  293. if err != nil {
  294. return err
  295. }
  296. }
  297. for discard > 0 {
  298. n, err := rd.Discard(int(discard))
  299. discard -= int64(n)
  300. if err != nil {
  301. return err
  302. }
  303. }
  304. return nil
  305. }