You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

packfile.go 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. package packfile
  2. import (
  3. "bytes"
  4. "io"
  5. "os"
  6. billy "github.com/go-git/go-billy/v5"
  7. "github.com/go-git/go-git/v5/plumbing"
  8. "github.com/go-git/go-git/v5/plumbing/cache"
  9. "github.com/go-git/go-git/v5/plumbing/format/idxfile"
  10. "github.com/go-git/go-git/v5/plumbing/storer"
  11. "github.com/go-git/go-git/v5/utils/ioutil"
  12. )
  13. var (
  14. // ErrInvalidObject is returned by Decode when an invalid object is
  15. // found in the packfile.
  16. ErrInvalidObject = NewError("invalid git object")
  17. // ErrZLib is returned by Decode when there was an error unzipping
  18. // the packfile contents.
  19. ErrZLib = NewError("zlib reading error")
  20. )
  21. // When reading small objects from packfile it is beneficial to do so at
  22. // once to exploit the buffered I/O. In many cases the objects are so small
  23. // that they were already loaded to memory when the object header was
  24. // loaded from the packfile. Wrapping in FSObject would cause this buffered
  25. // data to be thrown away and then re-read later, with the additional
  26. // seeking causing reloads from disk. Objects smaller than this threshold
  27. // are now always read into memory and stored in cache instead of being
  28. // wrapped in FSObject.
  29. const smallObjectThreshold = 16 * 1024
  30. // Packfile allows retrieving information from inside a packfile.
  31. type Packfile struct {
  32. idxfile.Index
  33. fs billy.Filesystem
  34. file billy.File
  35. s *Scanner
  36. deltaBaseCache cache.Object
  37. offsetToType map[int64]plumbing.ObjectType
  38. }
  39. // NewPackfileWithCache creates a new Packfile with the given object cache.
  40. // If the filesystem is provided, the packfile will return FSObjects, otherwise
  41. // it will return MemoryObjects.
  42. func NewPackfileWithCache(
  43. index idxfile.Index,
  44. fs billy.Filesystem,
  45. file billy.File,
  46. cache cache.Object,
  47. ) *Packfile {
  48. s := NewScanner(file)
  49. return &Packfile{
  50. index,
  51. fs,
  52. file,
  53. s,
  54. cache,
  55. make(map[int64]plumbing.ObjectType),
  56. }
  57. }
  58. // NewPackfile returns a packfile representation for the given packfile file
  59. // and packfile idx.
  60. // If the filesystem is provided, the packfile will return FSObjects, otherwise
  61. // it will return MemoryObjects.
  62. func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile {
  63. return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault())
  64. }
  65. // Get retrieves the encoded object in the packfile with the given hash.
  66. func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
  67. offset, err := p.FindOffset(h)
  68. if err != nil {
  69. return nil, err
  70. }
  71. return p.objectAtOffset(offset, h)
  72. }
  73. // GetByOffset retrieves the encoded object from the packfile at the given
  74. // offset.
  75. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
  76. hash, err := p.FindHash(o)
  77. if err != nil {
  78. return nil, err
  79. }
  80. return p.objectAtOffset(o, hash)
  81. }
  82. // GetSizeByOffset retrieves the size of the encoded object from the
  83. // packfile with the given offset.
  84. func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) {
  85. if _, err := p.s.SeekFromStart(o); err != nil {
  86. if err == io.EOF || isInvalid(err) {
  87. return 0, plumbing.ErrObjectNotFound
  88. }
  89. return 0, err
  90. }
  91. h, err := p.nextObjectHeader()
  92. if err != nil {
  93. return 0, err
  94. }
  95. return p.getObjectSize(h)
  96. }
  97. func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) {
  98. h, err := p.s.SeekObjectHeader(offset)
  99. p.s.pendingObject = nil
  100. return h, err
  101. }
  102. func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
  103. h, err := p.s.NextObjectHeader()
  104. p.s.pendingObject = nil
  105. return h, err
  106. }
  107. func (p *Packfile) getDeltaObjectSize(buf *bytes.Buffer) int64 {
  108. delta := buf.Bytes()
  109. _, delta = decodeLEB128(delta) // skip src size
  110. sz, _ := decodeLEB128(delta)
  111. return int64(sz)
  112. }
  113. func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
  114. switch h.Type {
  115. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  116. return h.Length, nil
  117. case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
  118. buf := bufPool.Get().(*bytes.Buffer)
  119. defer bufPool.Put(buf)
  120. buf.Reset()
  121. if _, _, err := p.s.NextObject(buf); err != nil {
  122. return 0, err
  123. }
  124. return p.getDeltaObjectSize(buf), nil
  125. default:
  126. return 0, ErrInvalidObject.AddDetails("type %q", h.Type)
  127. }
  128. }
  129. func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) {
  130. switch h.Type {
  131. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  132. return h.Type, nil
  133. case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
  134. var offset int64
  135. if h.Type == plumbing.REFDeltaObject {
  136. offset, err = p.FindOffset(h.Reference)
  137. if err != nil {
  138. return
  139. }
  140. } else {
  141. offset = h.OffsetReference
  142. }
  143. if baseType, ok := p.offsetToType[offset]; ok {
  144. typ = baseType
  145. } else {
  146. h, err = p.objectHeaderAtOffset(offset)
  147. if err != nil {
  148. return
  149. }
  150. typ, err = p.getObjectType(h)
  151. if err != nil {
  152. return
  153. }
  154. }
  155. default:
  156. err = ErrInvalidObject.AddDetails("type %q", h.Type)
  157. }
  158. p.offsetToType[h.Offset] = typ
  159. return
  160. }
  161. func (p *Packfile) objectAtOffset(offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) {
  162. if obj, ok := p.cacheGet(hash); ok {
  163. return obj, nil
  164. }
  165. h, err := p.objectHeaderAtOffset(offset)
  166. if err != nil {
  167. if err == io.EOF || isInvalid(err) {
  168. return nil, plumbing.ErrObjectNotFound
  169. }
  170. return nil, err
  171. }
  172. return p.getNextObject(h, hash)
  173. }
  174. func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing.EncodedObject, error) {
  175. var err error
  176. // If we have no filesystem, we will return a MemoryObject instead
  177. // of an FSObject.
  178. if p.fs == nil {
  179. return p.getNextMemoryObject(h)
  180. }
  181. // If the object is small enough then read it completely into memory now since
  182. // it is already read from disk into buffer anyway. For delta objects we want
  183. // to perform the optimization too, but we have to be careful about applying
  184. // small deltas on big objects.
  185. var size int64
  186. if h.Length <= smallObjectThreshold {
  187. if h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
  188. return p.getNextMemoryObject(h)
  189. }
  190. // For delta objects we read the delta data and apply the small object
  191. // optimization only if the expanded version of the object still meets
  192. // the small object threshold condition.
  193. buf := bufPool.Get().(*bytes.Buffer)
  194. defer bufPool.Put(buf)
  195. buf.Reset()
  196. if _, _, err := p.s.NextObject(buf); err != nil {
  197. return nil, err
  198. }
  199. size = p.getDeltaObjectSize(buf)
  200. if size <= smallObjectThreshold {
  201. var obj = new(plumbing.MemoryObject)
  202. obj.SetSize(size)
  203. if h.Type == plumbing.REFDeltaObject {
  204. err = p.fillREFDeltaObjectContentWithBuffer(obj, h.Reference, buf)
  205. } else {
  206. err = p.fillOFSDeltaObjectContentWithBuffer(obj, h.OffsetReference, buf)
  207. }
  208. return obj, err
  209. }
  210. } else {
  211. size, err = p.getObjectSize(h)
  212. if err != nil {
  213. return nil, err
  214. }
  215. }
  216. typ, err := p.getObjectType(h)
  217. if err != nil {
  218. return nil, err
  219. }
  220. p.offsetToType[h.Offset] = typ
  221. return NewFSObject(
  222. hash,
  223. typ,
  224. h.Offset,
  225. size,
  226. p.Index,
  227. p.fs,
  228. p.file.Name(),
  229. p.deltaBaseCache,
  230. ), nil
  231. }
  232. func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
  233. h, err := p.objectHeaderAtOffset(offset)
  234. if err != nil {
  235. return nil, err
  236. }
  237. // getObjectContent is called from FSObject, so we have to explicitly
  238. // get memory object here to avoid recursive cycle
  239. obj, err := p.getNextMemoryObject(h)
  240. if err != nil {
  241. return nil, err
  242. }
  243. return obj.Reader()
  244. }
  245. func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
  246. var obj = new(plumbing.MemoryObject)
  247. obj.SetSize(h.Length)
  248. obj.SetType(h.Type)
  249. var err error
  250. switch h.Type {
  251. case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
  252. err = p.fillRegularObjectContent(obj)
  253. case plumbing.REFDeltaObject:
  254. err = p.fillREFDeltaObjectContent(obj, h.Reference)
  255. case plumbing.OFSDeltaObject:
  256. err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference)
  257. default:
  258. err = ErrInvalidObject.AddDetails("type %q", h.Type)
  259. }
  260. if err != nil {
  261. return nil, err
  262. }
  263. p.offsetToType[h.Offset] = obj.Type()
  264. return obj, nil
  265. }
  266. func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) (err error) {
  267. w, err := obj.Writer()
  268. if err != nil {
  269. return err
  270. }
  271. defer ioutil.CheckClose(w, &err)
  272. _, _, err = p.s.NextObject(w)
  273. p.cachePut(obj)
  274. return err
  275. }
  276. func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error {
  277. buf := bufPool.Get().(*bytes.Buffer)
  278. defer bufPool.Put(buf)
  279. buf.Reset()
  280. _, _, err := p.s.NextObject(buf)
  281. if err != nil {
  282. return err
  283. }
  284. return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
  285. }
  286. func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
  287. var err error
  288. base, ok := p.cacheGet(ref)
  289. if !ok {
  290. base, err = p.Get(ref)
  291. if err != nil {
  292. return err
  293. }
  294. }
  295. obj.SetType(base.Type())
  296. err = ApplyDelta(obj, base, buf.Bytes())
  297. p.cachePut(obj)
  298. return err
  299. }
  300. func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error {
  301. buf := bufPool.Get().(*bytes.Buffer)
  302. defer bufPool.Put(buf)
  303. buf.Reset()
  304. _, _, err := p.s.NextObject(buf)
  305. if err != nil {
  306. return err
  307. }
  308. return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
  309. }
  310. func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
  311. hash, err := p.FindHash(offset)
  312. if err != nil {
  313. return err
  314. }
  315. base, err := p.objectAtOffset(offset, hash)
  316. if err != nil {
  317. return err
  318. }
  319. obj.SetType(base.Type())
  320. err = ApplyDelta(obj, base, buf.Bytes())
  321. p.cachePut(obj)
  322. return err
  323. }
  324. func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
  325. if p.deltaBaseCache == nil {
  326. return nil, false
  327. }
  328. return p.deltaBaseCache.Get(h)
  329. }
  330. func (p *Packfile) cachePut(obj plumbing.EncodedObject) {
  331. if p.deltaBaseCache == nil {
  332. return
  333. }
  334. p.deltaBaseCache.Put(obj)
  335. }
  336. // GetAll returns an iterator with all encoded objects in the packfile.
  337. // The iterator returned is not thread-safe, it should be used in the same
  338. // thread as the Packfile instance.
  339. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) {
  340. return p.GetByType(plumbing.AnyObject)
  341. }
  342. // GetByType returns all the objects of the given type.
  343. func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) {
  344. switch typ {
  345. case plumbing.AnyObject,
  346. plumbing.BlobObject,
  347. plumbing.TreeObject,
  348. plumbing.CommitObject,
  349. plumbing.TagObject:
  350. entries, err := p.EntriesByOffset()
  351. if err != nil {
  352. return nil, err
  353. }
  354. return &objectIter{
  355. // Easiest way to provide an object decoder is just to pass a Packfile
  356. // instance. To not mess with the seeks, it's a new instance with a
  357. // different scanner but the same cache and offset to hash map for
  358. // reusing as much cache as possible.
  359. p: p,
  360. iter: entries,
  361. typ: typ,
  362. }, nil
  363. default:
  364. return nil, plumbing.ErrInvalidType
  365. }
  366. }
  367. // ID returns the ID of the packfile, which is the checksum at the end of it.
  368. func (p *Packfile) ID() (plumbing.Hash, error) {
  369. prev, err := p.file.Seek(-20, io.SeekEnd)
  370. if err != nil {
  371. return plumbing.ZeroHash, err
  372. }
  373. var hash plumbing.Hash
  374. if _, err := io.ReadFull(p.file, hash[:]); err != nil {
  375. return plumbing.ZeroHash, err
  376. }
  377. if _, err := p.file.Seek(prev, io.SeekStart); err != nil {
  378. return plumbing.ZeroHash, err
  379. }
  380. return hash, nil
  381. }
  382. // Scanner returns the packfile's Scanner
  383. func (p *Packfile) Scanner() *Scanner {
  384. return p.s
  385. }
  386. // Close the packfile and its resources.
  387. func (p *Packfile) Close() error {
  388. closer, ok := p.file.(io.Closer)
  389. if !ok {
  390. return nil
  391. }
  392. return closer.Close()
  393. }
  394. type objectIter struct {
  395. p *Packfile
  396. typ plumbing.ObjectType
  397. iter idxfile.EntryIter
  398. }
  399. func (i *objectIter) Next() (plumbing.EncodedObject, error) {
  400. for {
  401. e, err := i.iter.Next()
  402. if err != nil {
  403. return nil, err
  404. }
  405. if i.typ != plumbing.AnyObject {
  406. if typ, ok := i.p.offsetToType[int64(e.Offset)]; ok {
  407. if typ != i.typ {
  408. continue
  409. }
  410. } else if obj, ok := i.p.cacheGet(e.Hash); ok {
  411. if obj.Type() != i.typ {
  412. i.p.offsetToType[int64(e.Offset)] = obj.Type()
  413. continue
  414. }
  415. return obj, nil
  416. } else {
  417. h, err := i.p.objectHeaderAtOffset(int64(e.Offset))
  418. if err != nil {
  419. return nil, err
  420. }
  421. if h.Type == plumbing.REFDeltaObject || h.Type == plumbing.OFSDeltaObject {
  422. typ, err := i.p.getObjectType(h)
  423. if err != nil {
  424. return nil, err
  425. }
  426. if typ != i.typ {
  427. i.p.offsetToType[int64(e.Offset)] = typ
  428. continue
  429. }
  430. // getObjectType will seek in the file so we cannot use getNextObject safely
  431. return i.p.objectAtOffset(int64(e.Offset), e.Hash)
  432. } else {
  433. if h.Type != i.typ {
  434. i.p.offsetToType[int64(e.Offset)] = h.Type
  435. continue
  436. }
  437. return i.p.getNextObject(h, e.Hash)
  438. }
  439. }
  440. }
  441. obj, err := i.p.objectAtOffset(int64(e.Offset), e.Hash)
  442. if err != nil {
  443. return nil, err
  444. }
  445. return obj, nil
  446. }
  447. }
  448. func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error {
  449. for {
  450. o, err := i.Next()
  451. if err != nil {
  452. if err == io.EOF {
  453. return nil
  454. }
  455. return err
  456. }
  457. if err := f(o); err != nil {
  458. return err
  459. }
  460. }
  461. }
  462. func (i *objectIter) Close() {
  463. i.iter.Close()
  464. }
  465. // isInvalid checks whether an error is an os.PathError with an os.ErrInvalid
  466. // error inside. It also checks for the windows error, which is different from
  467. // os.ErrInvalid.
  468. func isInvalid(err error) bool {
  469. pe, ok := err.(*os.PathError)
  470. if !ok {
  471. return false
  472. }
  473. errstr := pe.Err.Error()
  474. return errstr == errInvalidUnix || errstr == errInvalidWindows
  475. }
  476. // errInvalidWindows is the Windows equivalent to os.ErrInvalid
  477. const errInvalidWindows = "The parameter is incorrect."
  478. var errInvalidUnix = os.ErrInvalid.Error()