You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pointer_scanner_nogogit.go 3.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. //go:build !gogit
  4. package lfs
  5. import (
  6. "bufio"
  7. "context"
  8. "io"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "code.gitea.io/gitea/modules/git"
  13. "code.gitea.io/gitea/modules/git/pipeline"
  14. )
  15. // SearchPointerBlobs scans the whole repository for LFS pointer files
  16. func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {
  17. basePath := repo.Path
  18. catFileCheckReader, catFileCheckWriter := io.Pipe()
  19. shasToBatchReader, shasToBatchWriter := io.Pipe()
  20. catFileBatchReader, catFileBatchWriter := io.Pipe()
  21. wg := sync.WaitGroup{}
  22. wg.Add(4)
  23. // Create the go-routines in reverse order.
  24. // 4. Take the output of cat-file --batch and check if each file in turn
  25. // to see if they're pointers to files in the LFS store
  26. go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)
  27. // 3. Take the shas of the blobs and batch read them
  28. go pipeline.CatFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath)
  29. // 2. From the provided objects restrict to blobs <=1k
  30. go pipeline.BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
  31. // 1. Run batch-check on all objects in the repository
  32. if !git.DefaultFeatures().CheckVersionAtLeast("2.6.0") {
  33. revListReader, revListWriter := io.Pipe()
  34. shasToCheckReader, shasToCheckWriter := io.Pipe()
  35. wg.Add(2)
  36. go pipeline.CatFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath)
  37. go pipeline.BlobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
  38. go pipeline.RevListAllObjects(ctx, revListWriter, &wg, basePath, errChan)
  39. } else {
  40. go pipeline.CatFileBatchCheckAllObjects(ctx, catFileCheckWriter, &wg, basePath, errChan)
  41. }
  42. wg.Wait()
  43. close(pointerChan)
  44. close(errChan)
  45. }
  46. func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {
  47. defer wg.Done()
  48. defer catFileBatchReader.Close()
  49. bufferedReader := bufio.NewReader(catFileBatchReader)
  50. buf := make([]byte, 1025)
  51. loop:
  52. for {
  53. select {
  54. case <-ctx.Done():
  55. break loop
  56. default:
  57. }
  58. // File descriptor line: sha
  59. sha, err := bufferedReader.ReadString(' ')
  60. if err != nil {
  61. _ = catFileBatchReader.CloseWithError(err)
  62. break
  63. }
  64. sha = strings.TrimSpace(sha)
  65. // Throw away the blob
  66. if _, err := bufferedReader.ReadString(' '); err != nil {
  67. _ = catFileBatchReader.CloseWithError(err)
  68. break
  69. }
  70. sizeStr, err := bufferedReader.ReadString('\n')
  71. if err != nil {
  72. _ = catFileBatchReader.CloseWithError(err)
  73. break
  74. }
  75. size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
  76. if err != nil {
  77. _ = catFileBatchReader.CloseWithError(err)
  78. break
  79. }
  80. pointerBuf := buf[:size+1]
  81. if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
  82. _ = catFileBatchReader.CloseWithError(err)
  83. break
  84. }
  85. pointerBuf = pointerBuf[:size]
  86. // Now we need to check if the pointerBuf is an LFS pointer
  87. pointer, _ := ReadPointerFromBuffer(pointerBuf)
  88. if !pointer.IsValid() {
  89. continue
  90. }
  91. pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}
  92. }
  93. }