You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

format.go 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. // Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package xz
  5. import (
  6. "bytes"
  7. "crypto/sha256"
  8. "errors"
  9. "fmt"
  10. "hash"
  11. "hash/crc32"
  12. "io"
  13. "github.com/ulikunitz/xz/lzma"
  14. )
  15. // allZeros checks whether a given byte slice has only zeros.
  16. func allZeros(p []byte) bool {
  17. for _, c := range p {
  18. if c != 0 {
  19. return false
  20. }
  21. }
  22. return true
  23. }
  24. // padLen returns the length of the padding required for the given
  25. // argument.
  26. func padLen(n int64) int {
  27. k := int(n % 4)
  28. if k > 0 {
  29. k = 4 - k
  30. }
  31. return k
  32. }
  33. /*** Header ***/
  34. // headerMagic stores the magic bytes for the header
  35. var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}
  36. // HeaderLen provides the length of the xz file header.
  37. const HeaderLen = 12
  38. // Constants for the checksum methods supported by xz.
  39. const (
  40. None byte = 0x0
  41. CRC32 byte = 0x1
  42. CRC64 byte = 0x4
  43. SHA256 byte = 0xa
  44. )
  45. // errInvalidFlags indicates that flags are invalid.
  46. var errInvalidFlags = errors.New("xz: invalid flags")
  47. // verifyFlags returns the error errInvalidFlags if the value is
  48. // invalid.
  49. func verifyFlags(flags byte) error {
  50. switch flags {
  51. case None, CRC32, CRC64, SHA256:
  52. return nil
  53. default:
  54. return errInvalidFlags
  55. }
  56. }
  57. // flagstrings maps flag values to strings.
  58. var flagstrings = map[byte]string{
  59. None: "None",
  60. CRC32: "CRC-32",
  61. CRC64: "CRC-64",
  62. SHA256: "SHA-256",
  63. }
  64. // flagString returns the string representation for the given flags.
  65. func flagString(flags byte) string {
  66. s, ok := flagstrings[flags]
  67. if !ok {
  68. return "invalid"
  69. }
  70. return s
  71. }
  72. // newHashFunc returns a function that creates hash instances for the
  73. // hash method encoded in flags.
  74. func newHashFunc(flags byte) (newHash func() hash.Hash, err error) {
  75. switch flags {
  76. case None:
  77. newHash = newNoneHash
  78. case CRC32:
  79. newHash = newCRC32
  80. case CRC64:
  81. newHash = newCRC64
  82. case SHA256:
  83. newHash = sha256.New
  84. default:
  85. err = errInvalidFlags
  86. }
  87. return
  88. }
  89. // header provides the actual content of the xz file header: the flags.
  90. type header struct {
  91. flags byte
  92. }
  93. // Errors returned by readHeader.
  94. var errHeaderMagic = errors.New("xz: invalid header magic bytes")
  95. // ValidHeader checks whether data is a correct xz file header. The
  96. // length of data must be HeaderLen.
  97. func ValidHeader(data []byte) bool {
  98. var h header
  99. err := h.UnmarshalBinary(data)
  100. return err == nil
  101. }
  102. // String returns a string representation of the flags.
  103. func (h header) String() string {
  104. return flagString(h.flags)
  105. }
  106. // UnmarshalBinary reads header from the provided data slice.
  107. func (h *header) UnmarshalBinary(data []byte) error {
  108. // header length
  109. if len(data) != HeaderLen {
  110. return errors.New("xz: wrong file header length")
  111. }
  112. // magic header
  113. if !bytes.Equal(headerMagic, data[:6]) {
  114. return errHeaderMagic
  115. }
  116. // checksum
  117. crc := crc32.NewIEEE()
  118. crc.Write(data[6:8])
  119. if uint32LE(data[8:]) != crc.Sum32() {
  120. return errors.New("xz: invalid checksum for file header")
  121. }
  122. // stream flags
  123. if data[6] != 0 {
  124. return errInvalidFlags
  125. }
  126. flags := data[7]
  127. if err := verifyFlags(flags); err != nil {
  128. return err
  129. }
  130. h.flags = flags
  131. return nil
  132. }
  133. // MarshalBinary generates the xz file header.
  134. func (h *header) MarshalBinary() (data []byte, err error) {
  135. if err = verifyFlags(h.flags); err != nil {
  136. return nil, err
  137. }
  138. data = make([]byte, 12)
  139. copy(data, headerMagic)
  140. data[7] = h.flags
  141. crc := crc32.NewIEEE()
  142. crc.Write(data[6:8])
  143. putUint32LE(data[8:], crc.Sum32())
  144. return data, nil
  145. }
  146. /*** Footer ***/
  147. // footerLen defines the length of the footer.
  148. const footerLen = 12
  149. // footerMagic contains the footer magic bytes.
  150. var footerMagic = []byte{'Y', 'Z'}
  151. // footer represents the content of the xz file footer.
  152. type footer struct {
  153. indexSize int64
  154. flags byte
  155. }
  156. // String prints a string representation of the footer structure.
  157. func (f footer) String() string {
  158. return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize)
  159. }
  160. // Minimum and maximum for the size of the index (backward size).
  161. const (
  162. minIndexSize = 4
  163. maxIndexSize = (1 << 32) * 4
  164. )
  165. // MarshalBinary converts footer values into an xz file footer. Note
  166. // that the footer value is checked for correctness.
  167. func (f *footer) MarshalBinary() (data []byte, err error) {
  168. if err = verifyFlags(f.flags); err != nil {
  169. return nil, err
  170. }
  171. if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) {
  172. return nil, errors.New("xz: index size out of range")
  173. }
  174. if f.indexSize%4 != 0 {
  175. return nil, errors.New(
  176. "xz: index size not aligned to four bytes")
  177. }
  178. data = make([]byte, footerLen)
  179. // backward size (index size)
  180. s := (f.indexSize / 4) - 1
  181. putUint32LE(data[4:], uint32(s))
  182. // flags
  183. data[9] = f.flags
  184. // footer magic
  185. copy(data[10:], footerMagic)
  186. // CRC-32
  187. crc := crc32.NewIEEE()
  188. crc.Write(data[4:10])
  189. putUint32LE(data, crc.Sum32())
  190. return data, nil
  191. }
  192. // UnmarshalBinary sets the footer value by unmarshalling an xz file
  193. // footer.
  194. func (f *footer) UnmarshalBinary(data []byte) error {
  195. if len(data) != footerLen {
  196. return errors.New("xz: wrong footer length")
  197. }
  198. // magic bytes
  199. if !bytes.Equal(data[10:], footerMagic) {
  200. return errors.New("xz: footer magic invalid")
  201. }
  202. // CRC-32
  203. crc := crc32.NewIEEE()
  204. crc.Write(data[4:10])
  205. if uint32LE(data) != crc.Sum32() {
  206. return errors.New("xz: footer checksum error")
  207. }
  208. var g footer
  209. // backward size (index size)
  210. g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4
  211. // flags
  212. if data[8] != 0 {
  213. return errInvalidFlags
  214. }
  215. g.flags = data[9]
  216. if err := verifyFlags(g.flags); err != nil {
  217. return err
  218. }
  219. *f = g
  220. return nil
  221. }
  222. /*** Block Header ***/
  223. // blockHeader represents the content of an xz block header.
  224. type blockHeader struct {
  225. compressedSize int64
  226. uncompressedSize int64
  227. filters []filter
  228. }
  229. // String converts the block header into a string.
  230. func (h blockHeader) String() string {
  231. var buf bytes.Buffer
  232. first := true
  233. if h.compressedSize >= 0 {
  234. fmt.Fprintf(&buf, "compressed size %d", h.compressedSize)
  235. first = false
  236. }
  237. if h.uncompressedSize >= 0 {
  238. if !first {
  239. buf.WriteString(" ")
  240. }
  241. fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize)
  242. first = false
  243. }
  244. for _, f := range h.filters {
  245. if !first {
  246. buf.WriteString(" ")
  247. }
  248. fmt.Fprintf(&buf, "filter %s", f)
  249. first = false
  250. }
  251. return buf.String()
  252. }
  253. // Masks for the block flags.
  254. const (
  255. filterCountMask = 0x03
  256. compressedSizePresent = 0x40
  257. uncompressedSizePresent = 0x80
  258. reservedBlockFlags = 0x3C
  259. )
  260. // errIndexIndicator signals that an index indicator (0x00) has been found
  261. // instead of an expected block header indicator.
  262. var errIndexIndicator = errors.New("xz: found index indicator")
  263. // readBlockHeader reads the block header.
  264. func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) {
  265. var buf bytes.Buffer
  266. buf.Grow(20)
  267. // block header size
  268. z, err := io.CopyN(&buf, r, 1)
  269. n = int(z)
  270. if err != nil {
  271. return nil, n, err
  272. }
  273. s := buf.Bytes()[0]
  274. if s == 0 {
  275. return nil, n, errIndexIndicator
  276. }
  277. // read complete header
  278. headerLen := (int(s) + 1) * 4
  279. buf.Grow(headerLen - 1)
  280. z, err = io.CopyN(&buf, r, int64(headerLen-1))
  281. n += int(z)
  282. if err != nil {
  283. return nil, n, err
  284. }
  285. // unmarshal block header
  286. h = new(blockHeader)
  287. if err = h.UnmarshalBinary(buf.Bytes()); err != nil {
  288. return nil, n, err
  289. }
  290. return h, n, nil
  291. }
  292. // readSizeInBlockHeader reads the uncompressed or compressed size
  293. // fields in the block header. The present value informs the function
  294. // whether the respective field is actually present in the header.
  295. func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) {
  296. if !present {
  297. return -1, nil
  298. }
  299. x, _, err := readUvarint(r)
  300. if err != nil {
  301. return 0, err
  302. }
  303. if x >= 1<<63 {
  304. return 0, errors.New("xz: size overflow in block header")
  305. }
  306. return int64(x), nil
  307. }
  308. // UnmarshalBinary unmarshals the block header.
  309. func (h *blockHeader) UnmarshalBinary(data []byte) error {
  310. // Check header length
  311. s := data[0]
  312. if data[0] == 0 {
  313. return errIndexIndicator
  314. }
  315. headerLen := (int(s) + 1) * 4
  316. if len(data) != headerLen {
  317. return fmt.Errorf("xz: data length %d; want %d", len(data),
  318. headerLen)
  319. }
  320. n := headerLen - 4
  321. // Check CRC-32
  322. crc := crc32.NewIEEE()
  323. crc.Write(data[:n])
  324. if crc.Sum32() != uint32LE(data[n:]) {
  325. return errors.New("xz: checksum error for block header")
  326. }
  327. // Block header flags
  328. flags := data[1]
  329. if flags&reservedBlockFlags != 0 {
  330. return errors.New("xz: reserved block header flags set")
  331. }
  332. r := bytes.NewReader(data[2:n])
  333. // Compressed size
  334. var err error
  335. h.compressedSize, err = readSizeInBlockHeader(
  336. r, flags&compressedSizePresent != 0)
  337. if err != nil {
  338. return err
  339. }
  340. // Uncompressed size
  341. h.uncompressedSize, err = readSizeInBlockHeader(
  342. r, flags&uncompressedSizePresent != 0)
  343. if err != nil {
  344. return err
  345. }
  346. h.filters, err = readFilters(r, int(flags&filterCountMask)+1)
  347. if err != nil {
  348. return err
  349. }
  350. // Check padding
  351. // Since headerLen is a multiple of 4 we don't need to check
  352. // alignment.
  353. k := r.Len()
  354. // The standard spec says that the padding should have not more
  355. // than 3 bytes. However we found paddings of 4 or 5 in the
  356. // wild. See https://github.com/ulikunitz/xz/pull/11 and
  357. // https://github.com/ulikunitz/xz/issues/15
  358. //
  359. // The only reasonable approach seems to be to ignore the
  360. // padding size. We still check that all padding bytes are zero.
  361. if !allZeros(data[n-k : n]) {
  362. return errPadding
  363. }
  364. return nil
  365. }
  366. // MarshalBinary marshals the binary header.
  367. func (h *blockHeader) MarshalBinary() (data []byte, err error) {
  368. if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) {
  369. return nil, errors.New("xz: filter count wrong")
  370. }
  371. for i, f := range h.filters {
  372. if i < len(h.filters)-1 {
  373. if f.id() == lzmaFilterID {
  374. return nil, errors.New(
  375. "xz: LZMA2 filter is not the last")
  376. }
  377. } else {
  378. // last filter
  379. if f.id() != lzmaFilterID {
  380. return nil, errors.New("xz: " +
  381. "last filter must be the LZMA2 filter")
  382. }
  383. }
  384. }
  385. var buf bytes.Buffer
  386. // header size must set at the end
  387. buf.WriteByte(0)
  388. // flags
  389. flags := byte(len(h.filters) - 1)
  390. if h.compressedSize >= 0 {
  391. flags |= compressedSizePresent
  392. }
  393. if h.uncompressedSize >= 0 {
  394. flags |= uncompressedSizePresent
  395. }
  396. buf.WriteByte(flags)
  397. p := make([]byte, 10)
  398. if h.compressedSize >= 0 {
  399. k := putUvarint(p, uint64(h.compressedSize))
  400. buf.Write(p[:k])
  401. }
  402. if h.uncompressedSize >= 0 {
  403. k := putUvarint(p, uint64(h.uncompressedSize))
  404. buf.Write(p[:k])
  405. }
  406. for _, f := range h.filters {
  407. fp, err := f.MarshalBinary()
  408. if err != nil {
  409. return nil, err
  410. }
  411. buf.Write(fp)
  412. }
  413. // padding
  414. for i := padLen(int64(buf.Len())); i > 0; i-- {
  415. buf.WriteByte(0)
  416. }
  417. // crc place holder
  418. buf.Write(p[:4])
  419. data = buf.Bytes()
  420. if len(data)%4 != 0 {
  421. panic("data length not aligned")
  422. }
  423. s := len(data)/4 - 1
  424. if !(1 < s && s <= 255) {
  425. panic("wrong block header size")
  426. }
  427. data[0] = byte(s)
  428. crc := crc32.NewIEEE()
  429. crc.Write(data[:len(data)-4])
  430. putUint32LE(data[len(data)-4:], crc.Sum32())
  431. return data, nil
  432. }
  433. // Constants used for marshalling and unmarshalling filters in the xz
  434. // block header.
  435. const (
  436. minFilters = 1
  437. maxFilters = 4
  438. minReservedID = 1 << 62
  439. )
  440. // filter represents a filter in the block header.
  441. type filter interface {
  442. id() uint64
  443. UnmarshalBinary(data []byte) error
  444. MarshalBinary() (data []byte, err error)
  445. reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error)
  446. writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error)
  447. // filter must be last filter
  448. last() bool
  449. }
  450. // readFilter reads a block filter from the block header. At this point
  451. // in time only the LZMA2 filter is supported.
  452. func readFilter(r io.Reader) (f filter, err error) {
  453. br := lzma.ByteReader(r)
  454. // index
  455. id, _, err := readUvarint(br)
  456. if err != nil {
  457. return nil, err
  458. }
  459. var data []byte
  460. switch id {
  461. case lzmaFilterID:
  462. data = make([]byte, lzmaFilterLen)
  463. data[0] = lzmaFilterID
  464. if _, err = io.ReadFull(r, data[1:]); err != nil {
  465. return nil, err
  466. }
  467. f = new(lzmaFilter)
  468. default:
  469. if id >= minReservedID {
  470. return nil, errors.New(
  471. "xz: reserved filter id in block stream header")
  472. }
  473. return nil, errors.New("xz: invalid filter id")
  474. }
  475. if err = f.UnmarshalBinary(data); err != nil {
  476. return nil, err
  477. }
  478. return f, err
  479. }
  480. // readFilters reads count filters. At this point in time only the count
  481. // 1 is supported.
  482. func readFilters(r io.Reader, count int) (filters []filter, err error) {
  483. if count != 1 {
  484. return nil, errors.New("xz: unsupported filter count")
  485. }
  486. f, err := readFilter(r)
  487. if err != nil {
  488. return nil, err
  489. }
  490. return []filter{f}, err
  491. }
  492. /*** Index ***/
  493. // record describes a block in the xz file index.
  494. type record struct {
  495. unpaddedSize int64
  496. uncompressedSize int64
  497. }
  498. // readRecord reads an index record.
  499. func readRecord(r io.ByteReader) (rec record, n int, err error) {
  500. u, k, err := readUvarint(r)
  501. n += k
  502. if err != nil {
  503. return rec, n, err
  504. }
  505. rec.unpaddedSize = int64(u)
  506. if rec.unpaddedSize < 0 {
  507. return rec, n, errors.New("xz: unpadded size negative")
  508. }
  509. u, k, err = readUvarint(r)
  510. n += k
  511. if err != nil {
  512. return rec, n, err
  513. }
  514. rec.uncompressedSize = int64(u)
  515. if rec.uncompressedSize < 0 {
  516. return rec, n, errors.New("xz: uncompressed size negative")
  517. }
  518. return rec, n, nil
  519. }
  520. // MarshalBinary converts an index record in its binary encoding.
  521. func (rec *record) MarshalBinary() (data []byte, err error) {
  522. // maximum length of a uvarint is 10
  523. p := make([]byte, 20)
  524. n := putUvarint(p, uint64(rec.unpaddedSize))
  525. n += putUvarint(p[n:], uint64(rec.uncompressedSize))
  526. return p[:n], nil
  527. }
  528. // writeIndex writes the index, a sequence of records.
  529. func writeIndex(w io.Writer, index []record) (n int64, err error) {
  530. crc := crc32.NewIEEE()
  531. mw := io.MultiWriter(w, crc)
  532. // index indicator
  533. k, err := mw.Write([]byte{0})
  534. n += int64(k)
  535. if err != nil {
  536. return n, err
  537. }
  538. // number of records
  539. p := make([]byte, 10)
  540. k = putUvarint(p, uint64(len(index)))
  541. k, err = mw.Write(p[:k])
  542. n += int64(k)
  543. if err != nil {
  544. return n, err
  545. }
  546. // list of records
  547. for _, rec := range index {
  548. p, err := rec.MarshalBinary()
  549. if err != nil {
  550. return n, err
  551. }
  552. k, err = mw.Write(p)
  553. n += int64(k)
  554. if err != nil {
  555. return n, err
  556. }
  557. }
  558. // index padding
  559. k, err = mw.Write(make([]byte, padLen(int64(n))))
  560. n += int64(k)
  561. if err != nil {
  562. return n, err
  563. }
  564. // crc32 checksum
  565. putUint32LE(p, crc.Sum32())
  566. k, err = w.Write(p[:4])
  567. n += int64(k)
  568. return n, err
  569. }
  570. // readIndexBody reads the index from the reader. It assumes that the
  571. // index indicator has already been read.
  572. func readIndexBody(r io.Reader, expectedRecordLen int) (records []record, n int64, err error) {
  573. crc := crc32.NewIEEE()
  574. // index indicator
  575. crc.Write([]byte{0})
  576. br := lzma.ByteReader(io.TeeReader(r, crc))
  577. // number of records
  578. u, k, err := readUvarint(br)
  579. n += int64(k)
  580. if err != nil {
  581. return nil, n, err
  582. }
  583. recLen := int(u)
  584. if recLen < 0 || uint64(recLen) != u {
  585. return nil, n, errors.New("xz: record number overflow")
  586. }
  587. if recLen != expectedRecordLen {
  588. return nil, n, fmt.Errorf(
  589. "xz: index length is %d; want %d",
  590. recLen, expectedRecordLen)
  591. }
  592. // list of records
  593. records = make([]record, recLen)
  594. for i := range records {
  595. records[i], k, err = readRecord(br)
  596. n += int64(k)
  597. if err != nil {
  598. return nil, n, err
  599. }
  600. }
  601. p := make([]byte, padLen(int64(n+1)), 4)
  602. k, err = io.ReadFull(br.(io.Reader), p)
  603. n += int64(k)
  604. if err != nil {
  605. return nil, n, err
  606. }
  607. if !allZeros(p) {
  608. return nil, n, errors.New("xz: non-zero byte in index padding")
  609. }
  610. // crc32
  611. s := crc.Sum32()
  612. p = p[:4]
  613. k, err = io.ReadFull(br.(io.Reader), p)
  614. n += int64(k)
  615. if err != nil {
  616. return records, n, err
  617. }
  618. if uint32LE(p) != s {
  619. return nil, n, errors.New("xz: wrong checksum for index")
  620. }
  621. return records, n, nil
  622. }