You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 11KB


  1. package packfile
  2. import (
  3. "bytes"
  4. "errors"
  5. "io"
  6. stdioutil "io/ioutil"
  7. "github.com/go-git/go-git/v5/plumbing"
  8. "github.com/go-git/go-git/v5/plumbing/cache"
  9. "github.com/go-git/go-git/v5/plumbing/storer"
  10. "github.com/go-git/go-git/v5/utils/ioutil"
  11. )
  12. var (
  13. // ErrReferenceDeltaNotFound is returned when the reference delta is not
  14. // found.
  15. ErrReferenceDeltaNotFound = errors.New("reference delta not found")
  16. // ErrNotSeekableSource is returned when the source for the parser is not
  17. // seekable and a storage was not provided, so it can't be parsed.
  18. ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")
  19. // ErrDeltaNotCached is returned when the delta could not be found in cache.
  20. ErrDeltaNotCached = errors.New("delta could not be found in cache")
  21. )
  22. // Observer interface is implemented by index encoders.
  23. type Observer interface {
  24. // OnHeader is called when a new packfile is opened.
  25. OnHeader(count uint32) error
  26. // OnInflatedObjectHeader is called for each object header read.
  27. OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
  28. // OnInflatedObjectContent is called for each decoded object.
  29. OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error
  30. // OnFooter is called when decoding is done.
  31. OnFooter(h plumbing.Hash) error
  32. }
  33. // Parser decodes a packfile and calls any observer associated to it. Is used
  34. // to generate indexes.
  35. type Parser struct {
  36. storage storer.EncodedObjectStorer
  37. scanner *Scanner
  38. count uint32
  39. oi []*objectInfo
  40. oiByHash map[plumbing.Hash]*objectInfo
  41. oiByOffset map[int64]*objectInfo
  42. hashOffset map[plumbing.Hash]int64
  43. checksum plumbing.Hash
  44. cache *cache.BufferLRU
  45. // delta content by offset, only used if source is not seekable
  46. deltas map[int64][]byte
  47. ob []Observer
  48. }
  49. // NewParser creates a new Parser. The Scanner source must be seekable.
  50. // If it's not, NewParserWithStorage should be used instead.
  51. func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) {
  52. return NewParserWithStorage(scanner, nil, ob...)
  53. }
  54. // NewParserWithStorage creates a new Parser. The scanner source must either
  55. // be seekable or a storage must be provided.
  56. func NewParserWithStorage(
  57. scanner *Scanner,
  58. storage storer.EncodedObjectStorer,
  59. ob ...Observer,
  60. ) (*Parser, error) {
  61. if !scanner.IsSeekable && storage == nil {
  62. return nil, ErrNotSeekableSource
  63. }
  64. var deltas map[int64][]byte
  65. if !scanner.IsSeekable {
  66. deltas = make(map[int64][]byte)
  67. }
  68. return &Parser{
  69. storage: storage,
  70. scanner: scanner,
  71. ob: ob,
  72. count: 0,
  73. cache: cache.NewBufferLRUDefault(),
  74. deltas: deltas,
  75. }, nil
  76. }
  77. func (p *Parser) forEachObserver(f func(o Observer) error) error {
  78. for _, o := range p.ob {
  79. if err := f(o); err != nil {
  80. return err
  81. }
  82. }
  83. return nil
  84. }
  85. func (p *Parser) onHeader(count uint32) error {
  86. return p.forEachObserver(func(o Observer) error {
  87. return o.OnHeader(count)
  88. })
  89. }
  90. func (p *Parser) onInflatedObjectHeader(
  91. t plumbing.ObjectType,
  92. objSize int64,
  93. pos int64,
  94. ) error {
  95. return p.forEachObserver(func(o Observer) error {
  96. return o.OnInflatedObjectHeader(t, objSize, pos)
  97. })
  98. }
  99. func (p *Parser) onInflatedObjectContent(
  100. h plumbing.Hash,
  101. pos int64,
  102. crc uint32,
  103. content []byte,
  104. ) error {
  105. return p.forEachObserver(func(o Observer) error {
  106. return o.OnInflatedObjectContent(h, pos, crc, content)
  107. })
  108. }
  109. func (p *Parser) onFooter(h plumbing.Hash) error {
  110. return p.forEachObserver(func(o Observer) error {
  111. return o.OnFooter(h)
  112. })
  113. }
  114. // Parse start decoding phase of the packfile.
  115. func (p *Parser) Parse() (plumbing.Hash, error) {
  116. if err := p.init(); err != nil {
  117. return plumbing.ZeroHash, err
  118. }
  119. if err := p.indexObjects(); err != nil {
  120. return plumbing.ZeroHash, err
  121. }
  122. var err error
  123. p.checksum, err = p.scanner.Checksum()
  124. if err != nil && err != io.EOF {
  125. return plumbing.ZeroHash, err
  126. }
  127. if err := p.resolveDeltas(); err != nil {
  128. return plumbing.ZeroHash, err
  129. }
  130. if err := p.onFooter(p.checksum); err != nil {
  131. return plumbing.ZeroHash, err
  132. }
  133. return p.checksum, nil
  134. }
  135. func (p *Parser) init() error {
  136. _, c, err := p.scanner.Header()
  137. if err != nil {
  138. return err
  139. }
  140. if err := p.onHeader(c); err != nil {
  141. return err
  142. }
  143. p.count = c
  144. p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
  145. p.oiByOffset = make(map[int64]*objectInfo, p.count)
  146. p.oi = make([]*objectInfo, p.count)
  147. return nil
  148. }
  149. func (p *Parser) indexObjects() error {
  150. buf := new(bytes.Buffer)
  151. for i := uint32(0); i < p.count; i++ {
  152. buf.Reset()
  153. oh, err := p.scanner.NextObjectHeader()
  154. if err != nil {
  155. return err
  156. }
  157. delta := false
  158. var ota *objectInfo
  159. switch t := oh.Type; t {
  160. case plumbing.OFSDeltaObject:
  161. delta = true
  162. parent, ok := p.oiByOffset[oh.OffsetReference]
  163. if !ok {
  164. return plumbing.ErrObjectNotFound
  165. }
  166. ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
  167. parent.Children = append(parent.Children, ota)
  168. case plumbing.REFDeltaObject:
  169. delta = true
  170. parent, ok := p.oiByHash[oh.Reference]
  171. if !ok {
  172. // can't find referenced object in this pack file
  173. // this must be a "thin" pack.
  174. parent = &objectInfo{ //Placeholder parent
  175. SHA1: oh.Reference,
  176. ExternalRef: true, // mark as an external reference that must be resolved
  177. Type: plumbing.AnyObject,
  178. DiskType: plumbing.AnyObject,
  179. }
  180. p.oiByHash[oh.Reference] = parent
  181. }
  182. ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
  183. parent.Children = append(parent.Children, ota)
  184. default:
  185. ota = newBaseObject(oh.Offset, oh.Length, t)
  186. }
  187. _, crc, err := p.scanner.NextObject(buf)
  188. if err != nil {
  189. return err
  190. }
  191. ota.Crc32 = crc
  192. ota.Length = oh.Length
  193. data := buf.Bytes()
  194. if !delta {
  195. sha1, err := getSHA1(ota.Type, data)
  196. if err != nil {
  197. return err
  198. }
  199. ota.SHA1 = sha1
  200. p.oiByHash[ota.SHA1] = ota
  201. }
  202. if p.storage != nil && !delta {
  203. obj := new(plumbing.MemoryObject)
  204. obj.SetSize(oh.Length)
  205. obj.SetType(oh.Type)
  206. if _, err := obj.Write(data); err != nil {
  207. return err
  208. }
  209. if _, err := p.storage.SetEncodedObject(obj); err != nil {
  210. return err
  211. }
  212. }
  213. if delta && !p.scanner.IsSeekable {
  214. p.deltas[oh.Offset] = make([]byte, len(data))
  215. copy(p.deltas[oh.Offset], data)
  216. }
  217. p.oiByOffset[oh.Offset] = ota
  218. p.oi[i] = ota
  219. }
  220. return nil
  221. }
  222. func (p *Parser) resolveDeltas() error {
  223. buf := &bytes.Buffer{}
  224. for _, obj := range p.oi {
  225. buf.Reset()
  226. err := p.get(obj, buf)
  227. if err != nil {
  228. return err
  229. }
  230. content := buf.Bytes()
  231. if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
  232. return err
  233. }
  234. if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil {
  235. return err
  236. }
  237. if !obj.IsDelta() && len(obj.Children) > 0 {
  238. for _, child := range obj.Children {
  239. if err := p.resolveObject(stdioutil.Discard, child, content); err != nil {
  240. return err
  241. }
  242. }
  243. // Remove the delta from the cache.
  244. if obj.DiskType.IsDelta() && !p.scanner.IsSeekable {
  245. delete(p.deltas, obj.Offset)
  246. }
  247. }
  248. }
  249. return nil
  250. }
  251. func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
  252. if !o.ExternalRef { // skip cache check for placeholder parents
  253. b, ok := p.cache.Get(o.Offset)
  254. if ok {
  255. _, err := buf.Write(b)
  256. return err
  257. }
  258. }
  259. // If it's not on the cache and is not a delta we can try to find it in the
  260. // storage, if there's one. External refs must enter here.
  261. if p.storage != nil && !o.Type.IsDelta() {
  262. var e plumbing.EncodedObject
  263. e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
  264. if err != nil {
  265. return err
  266. }
  267. o.Type = e.Type()
  268. var r io.ReadCloser
  269. r, err = e.Reader()
  270. if err != nil {
  271. return err
  272. }
  273. defer ioutil.CheckClose(r, &err)
  274. _, err = buf.ReadFrom(io.LimitReader(r, e.Size()))
  275. return err
  276. }
  277. if o.ExternalRef {
  278. // we were not able to resolve a ref in a thin pack
  279. return ErrReferenceDeltaNotFound
  280. }
  281. if o.DiskType.IsDelta() {
  282. b := bufPool.Get().(*bytes.Buffer)
  283. defer bufPool.Put(b)
  284. b.Reset()
  285. err := p.get(o.Parent, b)
  286. if err != nil {
  287. return err
  288. }
  289. base := b.Bytes()
  290. err = p.resolveObject(buf, o, base)
  291. if err != nil {
  292. return err
  293. }
  294. } else {
  295. err := p.readData(buf, o)
  296. if err != nil {
  297. return err
  298. }
  299. }
  300. if len(o.Children) > 0 {
  301. data := make([]byte, buf.Len())
  302. copy(data, buf.Bytes())
  303. p.cache.Put(o.Offset, data)
  304. }
  305. return nil
  306. }
  307. func (p *Parser) resolveObject(
  308. w io.Writer,
  309. o *objectInfo,
  310. base []byte,
  311. ) error {
  312. if !o.DiskType.IsDelta() {
  313. return nil
  314. }
  315. buf := bufPool.Get().(*bytes.Buffer)
  316. defer bufPool.Put(buf)
  317. buf.Reset()
  318. err := p.readData(buf, o)
  319. if err != nil {
  320. return err
  321. }
  322. data := buf.Bytes()
  323. data, err = applyPatchBase(o, data, base)
  324. if err != nil {
  325. return err
  326. }
  327. if p.storage != nil {
  328. obj := new(plumbing.MemoryObject)
  329. obj.SetSize(o.Size())
  330. obj.SetType(o.Type)
  331. if _, err := obj.Write(data); err != nil {
  332. return err
  333. }
  334. if _, err := p.storage.SetEncodedObject(obj); err != nil {
  335. return err
  336. }
  337. }
  338. _, err = w.Write(data)
  339. return err
  340. }
  341. func (p *Parser) readData(w io.Writer, o *objectInfo) error {
  342. if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
  343. data, ok := p.deltas[o.Offset]
  344. if !ok {
  345. return ErrDeltaNotCached
  346. }
  347. _, err := w.Write(data)
  348. return err
  349. }
  350. if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
  351. return err
  352. }
  353. if _, _, err := p.scanner.NextObject(w); err != nil {
  354. return err
  355. }
  356. return nil
  357. }
  358. func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
  359. patched, err := PatchDelta(base, data)
  360. if err != nil {
  361. return nil, err
  362. }
  363. if ota.SHA1 == plumbing.ZeroHash {
  364. ota.Type = ota.Parent.Type
  365. sha1, err := getSHA1(ota.Type, patched)
  366. if err != nil {
  367. return nil, err
  368. }
  369. ota.SHA1 = sha1
  370. ota.Length = int64(len(patched))
  371. }
  372. return patched, nil
  373. }
  374. func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
  375. hasher := plumbing.NewHasher(t, int64(len(data)))
  376. if _, err := hasher.Write(data); err != nil {
  377. return plumbing.ZeroHash, err
  378. }
  379. return hasher.Sum(), nil
  380. }
  381. type objectInfo struct {
  382. Offset int64
  383. Length int64
  384. Type plumbing.ObjectType
  385. DiskType plumbing.ObjectType
  386. ExternalRef bool // indicates this is an external reference in a thin pack file
  387. Crc32 uint32
  388. Parent *objectInfo
  389. Children []*objectInfo
  390. SHA1 plumbing.Hash
  391. }
  392. func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
  393. return newDeltaObject(offset, length, t, nil)
  394. }
  395. func newDeltaObject(
  396. offset, length int64,
  397. t plumbing.ObjectType,
  398. parent *objectInfo,
  399. ) *objectInfo {
  400. obj := &objectInfo{
  401. Offset: offset,
  402. Length: length,
  403. Type: t,
  404. DiskType: t,
  405. Crc32: 0,
  406. Parent: parent,
  407. }
  408. return obj
  409. }
  410. func (o *objectInfo) IsDelta() bool {
  411. return o.Type.IsDelta()
  412. }
  413. func (o *objectInfo) Size() int64 {
  414. return o.Length
  415. }