You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

huf_decompress.c 50KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248
  1. /* ******************************************************************
  2. * huff0 huffman decoder,
  3. * part of Finite State Entropy library
  4. * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
  5. *
  6. * You can contact the author at :
  7. * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Dependencies
  16. ****************************************************************/
  17. #include <string.h> /* memcpy, memset */
  18. #include "compiler.h"
  19. #include "bitstream.h" /* BIT_* */
  20. #include "fse.h" /* to compress headers */
  21. #define HUF_STATIC_LINKING_ONLY
  22. #include "huf.h"
  23. #include "error_private.h"
  24. /* **************************************************************
  25. * Macros
  26. ****************************************************************/
  27. /* These two optional macros force the use one way or another of the two
  28. * Huffman decompression implementations. You can't force in both directions
  29. * at the same time.
  30. */
  31. #if defined(HUF_FORCE_DECOMPRESS_X1) && \
  32. defined(HUF_FORCE_DECOMPRESS_X2)
  33. #error "Cannot force the use of the X1 and X2 decoders at the same time!"
  34. #endif
  35. /* **************************************************************
  36. * Error Management
  37. ****************************************************************/
  38. #define HUF_isError ERR_isError
  39. /* **************************************************************
  40. * Byte alignment for workSpace management
  41. ****************************************************************/
  42. #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
  43. #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
  44. /* **************************************************************
  45. * BMI2 Variant Wrappers
  46. ****************************************************************/
  47. #if DYNAMIC_BMI2
  48. #define HUF_DGEN(fn) \
  49. \
  50. static size_t fn##_default( \
  51. void* dst, size_t dstSize, \
  52. const void* cSrc, size_t cSrcSize, \
  53. const HUF_DTable* DTable) \
  54. { \
  55. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  56. } \
  57. \
  58. static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
  59. void* dst, size_t dstSize, \
  60. const void* cSrc, size_t cSrcSize, \
  61. const HUF_DTable* DTable) \
  62. { \
  63. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  64. } \
  65. \
  66. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  67. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  68. { \
  69. if (bmi2) { \
  70. return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
  71. } \
  72. return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
  73. }
  74. #else
  75. #define HUF_DGEN(fn) \
  76. static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
  77. size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
  78. { \
  79. (void)bmi2; \
  80. return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
  81. }
  82. #endif
  83. /*-***************************/
  84. /* generic DTableDesc */
  85. /*-***************************/
  86. typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
  87. static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
  88. {
  89. DTableDesc dtd;
  90. memcpy(&dtd, table, sizeof(dtd));
  91. return dtd;
  92. }
  93. #ifndef HUF_FORCE_DECOMPRESS_X2
  94. /*-***************************/
  95. /* single-symbol decoding */
  96. /*-***************************/
  97. typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
  98. size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
  99. {
  100. U32 tableLog = 0;
  101. U32 nbSymbols = 0;
  102. size_t iSize;
  103. void* const dtPtr = DTable + 1;
  104. HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
  105. U32* rankVal;
  106. BYTE* huffWeight;
  107. size_t spaceUsed32 = 0;
  108. rankVal = (U32 *)workSpace + spaceUsed32;
  109. spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
  110. huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
  111. spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
  112. if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
  113. DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
  114. /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
  115. iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
  116. if (HUF_isError(iSize)) return iSize;
  117. /* Table header */
  118. { DTableDesc dtd = HUF_getDTableDesc(DTable);
  119. if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
  120. dtd.tableType = 0;
  121. dtd.tableLog = (BYTE)tableLog;
  122. memcpy(DTable, &dtd, sizeof(dtd));
  123. }
  124. /* Calculate starting value for each rank */
  125. { U32 n, nextRankStart = 0;
  126. for (n=1; n<tableLog+1; n++) {
  127. U32 const current = nextRankStart;
  128. nextRankStart += (rankVal[n] << (n-1));
  129. rankVal[n] = current;
  130. } }
  131. /* fill DTable */
  132. { U32 n;
  133. size_t const nEnd = nbSymbols;
  134. for (n=0; n<nEnd; n++) {
  135. size_t const w = huffWeight[n];
  136. size_t const length = (1 << w) >> 1;
  137. size_t const uStart = rankVal[w];
  138. size_t const uEnd = uStart + length;
  139. size_t u;
  140. HUF_DEltX1 D;
  141. D.byte = (BYTE)n;
  142. D.nbBits = (BYTE)(tableLog + 1 - w);
  143. rankVal[w] = (U32)uEnd;
  144. if (length < 4) {
  145. /* Use length in the loop bound so the compiler knows it is short. */
  146. for (u = 0; u < length; ++u)
  147. dt[uStart + u] = D;
  148. } else {
  149. /* Unroll the loop 4 times, we know it is a power of 2. */
  150. for (u = uStart; u < uEnd; u += 4) {
  151. dt[u + 0] = D;
  152. dt[u + 1] = D;
  153. dt[u + 2] = D;
  154. dt[u + 3] = D;
  155. } } } }
  156. return iSize;
  157. }
  158. size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
  159. {
  160. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  161. return HUF_readDTableX1_wksp(DTable, src, srcSize,
  162. workSpace, sizeof(workSpace));
  163. }
  164. FORCE_INLINE_TEMPLATE BYTE
  165. HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
  166. {
  167. size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
  168. BYTE const c = dt[val].byte;
  169. BIT_skipBits(Dstream, dt[val].nbBits);
  170. return c;
  171. }
  172. #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
  173. *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
  174. #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
  175. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  176. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  177. #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
  178. if (MEM_64bits()) \
  179. HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
  180. HINT_INLINE size_t
  181. HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
  182. {
  183. BYTE* const pStart = p;
  184. /* up to 4 symbols at a time */
  185. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
  186. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  187. HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
  188. HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
  189. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  190. }
  191. /* [0-3] symbols remaining */
  192. if (MEM_32bits())
  193. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
  194. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  195. /* no more data to retrieve from bitstream, no need to reload */
  196. while (p < pEnd)
  197. HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
  198. return pEnd-pStart;
  199. }
  200. FORCE_INLINE_TEMPLATE size_t
  201. HUF_decompress1X1_usingDTable_internal_body(
  202. void* dst, size_t dstSize,
  203. const void* cSrc, size_t cSrcSize,
  204. const HUF_DTable* DTable)
  205. {
  206. BYTE* op = (BYTE*)dst;
  207. BYTE* const oend = op + dstSize;
  208. const void* dtPtr = DTable + 1;
  209. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  210. BIT_DStream_t bitD;
  211. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  212. U32 const dtLog = dtd.tableLog;
  213. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  214. HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
  215. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  216. return dstSize;
  217. }
  218. FORCE_INLINE_TEMPLATE size_t
  219. HUF_decompress4X1_usingDTable_internal_body(
  220. void* dst, size_t dstSize,
  221. const void* cSrc, size_t cSrcSize,
  222. const HUF_DTable* DTable)
  223. {
  224. /* Check */
  225. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  226. { const BYTE* const istart = (const BYTE*) cSrc;
  227. BYTE* const ostart = (BYTE*) dst;
  228. BYTE* const oend = ostart + dstSize;
  229. BYTE* const olimit = oend - 3;
  230. const void* const dtPtr = DTable + 1;
  231. const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
  232. /* Init */
  233. BIT_DStream_t bitD1;
  234. BIT_DStream_t bitD2;
  235. BIT_DStream_t bitD3;
  236. BIT_DStream_t bitD4;
  237. size_t const length1 = MEM_readLE16(istart);
  238. size_t const length2 = MEM_readLE16(istart+2);
  239. size_t const length3 = MEM_readLE16(istart+4);
  240. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  241. const BYTE* const istart1 = istart + 6; /* jumpTable */
  242. const BYTE* const istart2 = istart1 + length1;
  243. const BYTE* const istart3 = istart2 + length2;
  244. const BYTE* const istart4 = istart3 + length3;
  245. const size_t segmentSize = (dstSize+3) / 4;
  246. BYTE* const opStart2 = ostart + segmentSize;
  247. BYTE* const opStart3 = opStart2 + segmentSize;
  248. BYTE* const opStart4 = opStart3 + segmentSize;
  249. BYTE* op1 = ostart;
  250. BYTE* op2 = opStart2;
  251. BYTE* op3 = opStart3;
  252. BYTE* op4 = opStart4;
  253. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  254. U32 const dtLog = dtd.tableLog;
  255. U32 endSignal = 1;
  256. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  257. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  258. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  259. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  260. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  261. /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
  262. for ( ; (endSignal) & (op4 < olimit) ; ) {
  263. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  264. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  265. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  266. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  267. HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
  268. HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
  269. HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
  270. HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
  271. HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
  272. HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
  273. HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
  274. HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
  275. HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
  276. HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
  277. HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
  278. HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
  279. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  280. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  281. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  282. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  283. }
  284. /* check corruption */
  285. /* note : should not be necessary : op# advance in lock step, and we control op4.
  286. * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
  287. if (op1 > opStart2) return ERROR(corruption_detected);
  288. if (op2 > opStart3) return ERROR(corruption_detected);
  289. if (op3 > opStart4) return ERROR(corruption_detected);
  290. /* note : op4 supposed already verified within main loop */
  291. /* finish bitStreams one by one */
  292. HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
  293. HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
  294. HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
  295. HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
  296. /* check */
  297. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  298. if (!endCheck) return ERROR(corruption_detected); }
  299. /* decoded size */
  300. return dstSize;
  301. }
  302. }
  303. typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
  304. const void *cSrc,
  305. size_t cSrcSize,
  306. const HUF_DTable *DTable);
  307. HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
  308. HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
  309. size_t HUF_decompress1X1_usingDTable(
  310. void* dst, size_t dstSize,
  311. const void* cSrc, size_t cSrcSize,
  312. const HUF_DTable* DTable)
  313. {
  314. DTableDesc dtd = HUF_getDTableDesc(DTable);
  315. if (dtd.tableType != 0) return ERROR(GENERIC);
  316. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  317. }
  318. size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  319. const void* cSrc, size_t cSrcSize,
  320. void* workSpace, size_t wkspSize)
  321. {
  322. const BYTE* ip = (const BYTE*) cSrc;
  323. size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
  324. if (HUF_isError(hSize)) return hSize;
  325. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  326. ip += hSize; cSrcSize -= hSize;
  327. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  328. }
  329. size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
  330. const void* cSrc, size_t cSrcSize)
  331. {
  332. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  333. return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
  334. workSpace, sizeof(workSpace));
  335. }
  336. size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  337. {
  338. HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
  339. return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
  340. }
  341. size_t HUF_decompress4X1_usingDTable(
  342. void* dst, size_t dstSize,
  343. const void* cSrc, size_t cSrcSize,
  344. const HUF_DTable* DTable)
  345. {
  346. DTableDesc dtd = HUF_getDTableDesc(DTable);
  347. if (dtd.tableType != 0) return ERROR(GENERIC);
  348. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  349. }
  350. static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  351. const void* cSrc, size_t cSrcSize,
  352. void* workSpace, size_t wkspSize, int bmi2)
  353. {
  354. const BYTE* ip = (const BYTE*) cSrc;
  355. size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
  356. workSpace, wkspSize);
  357. if (HUF_isError(hSize)) return hSize;
  358. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  359. ip += hSize; cSrcSize -= hSize;
  360. return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  361. }
  362. size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  363. const void* cSrc, size_t cSrcSize,
  364. void* workSpace, size_t wkspSize)
  365. {
  366. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
  367. }
  368. size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  369. {
  370. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  371. return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  372. workSpace, sizeof(workSpace));
  373. }
  374. size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  375. {
  376. HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
  377. return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  378. }
  379. #endif /* HUF_FORCE_DECOMPRESS_X2 */
  380. #ifndef HUF_FORCE_DECOMPRESS_X1
  381. /* *************************/
  382. /* double-symbols decoding */
  383. /* *************************/
  384. typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
  385. typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
  386. typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
  387. typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
  388. /* HUF_fillDTableX2Level2() :
  389. * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
  390. static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
  391. const U32* rankValOrigin, const int minWeight,
  392. const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
  393. U32 nbBitsBaseline, U16 baseSeq)
  394. {
  395. HUF_DEltX2 DElt;
  396. U32 rankVal[HUF_TABLELOG_MAX + 1];
  397. /* get pre-calculated rankVal */
  398. memcpy(rankVal, rankValOrigin, sizeof(rankVal));
  399. /* fill skipped values */
  400. if (minWeight>1) {
  401. U32 i, skipSize = rankVal[minWeight];
  402. MEM_writeLE16(&(DElt.sequence), baseSeq);
  403. DElt.nbBits = (BYTE)(consumed);
  404. DElt.length = 1;
  405. for (i = 0; i < skipSize; i++)
  406. DTable[i] = DElt;
  407. }
  408. /* fill DTable */
  409. { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
  410. const U32 symbol = sortedSymbols[s].symbol;
  411. const U32 weight = sortedSymbols[s].weight;
  412. const U32 nbBits = nbBitsBaseline - weight;
  413. const U32 length = 1 << (sizeLog-nbBits);
  414. const U32 start = rankVal[weight];
  415. U32 i = start;
  416. const U32 end = start + length;
  417. MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
  418. DElt.nbBits = (BYTE)(nbBits + consumed);
  419. DElt.length = 2;
  420. do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
  421. rankVal[weight] += length;
  422. } }
  423. }
  424. static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
  425. const sortedSymbol_t* sortedList, const U32 sortedListSize,
  426. const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
  427. const U32 nbBitsBaseline)
  428. {
  429. U32 rankVal[HUF_TABLELOG_MAX + 1];
  430. const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
  431. const U32 minBits = nbBitsBaseline - maxWeight;
  432. U32 s;
  433. memcpy(rankVal, rankValOrigin, sizeof(rankVal));
  434. /* fill DTable */
  435. for (s=0; s<sortedListSize; s++) {
  436. const U16 symbol = sortedList[s].symbol;
  437. const U32 weight = sortedList[s].weight;
  438. const U32 nbBits = nbBitsBaseline - weight;
  439. const U32 start = rankVal[weight];
  440. const U32 length = 1 << (targetLog-nbBits);
  441. if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
  442. U32 sortedRank;
  443. int minWeight = nbBits + scaleLog;
  444. if (minWeight < 1) minWeight = 1;
  445. sortedRank = rankStart[minWeight];
  446. HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
  447. rankValOrigin[nbBits], minWeight,
  448. sortedList+sortedRank, sortedListSize-sortedRank,
  449. nbBitsBaseline, symbol);
  450. } else {
  451. HUF_DEltX2 DElt;
  452. MEM_writeLE16(&(DElt.sequence), symbol);
  453. DElt.nbBits = (BYTE)(nbBits);
  454. DElt.length = 1;
  455. { U32 const end = start + length;
  456. U32 u;
  457. for (u = start; u < end; u++) DTable[u] = DElt;
  458. } }
  459. rankVal[weight] += length;
  460. }
  461. }
  462. size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
  463. const void* src, size_t srcSize,
  464. void* workSpace, size_t wkspSize)
  465. {
  466. U32 tableLog, maxW, sizeOfSort, nbSymbols;
  467. DTableDesc dtd = HUF_getDTableDesc(DTable);
  468. U32 const maxTableLog = dtd.maxTableLog;
  469. size_t iSize;
  470. void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
  471. HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
  472. U32 *rankStart;
  473. rankValCol_t* rankVal;
  474. U32* rankStats;
  475. U32* rankStart0;
  476. sortedSymbol_t* sortedSymbol;
  477. BYTE* weightList;
  478. size_t spaceUsed32 = 0;
  479. rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
  480. spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
  481. rankStats = (U32 *)workSpace + spaceUsed32;
  482. spaceUsed32 += HUF_TABLELOG_MAX + 1;
  483. rankStart0 = (U32 *)workSpace + spaceUsed32;
  484. spaceUsed32 += HUF_TABLELOG_MAX + 2;
  485. sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
  486. spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
  487. weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
  488. spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
  489. if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
  490. rankStart = rankStart0 + 1;
  491. memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
  492. DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
  493. if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
  494. /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
  495. iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
  496. if (HUF_isError(iSize)) return iSize;
  497. /* check result */
  498. if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
  499. /* find maxWeight */
  500. for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
  501. /* Get start index of each weight */
  502. { U32 w, nextRankStart = 0;
  503. for (w=1; w<maxW+1; w++) {
  504. U32 current = nextRankStart;
  505. nextRankStart += rankStats[w];
  506. rankStart[w] = current;
  507. }
  508. rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
  509. sizeOfSort = nextRankStart;
  510. }
  511. /* sort symbols by weight */
  512. { U32 s;
  513. for (s=0; s<nbSymbols; s++) {
  514. U32 const w = weightList[s];
  515. U32 const r = rankStart[w]++;
  516. sortedSymbol[r].symbol = (BYTE)s;
  517. sortedSymbol[r].weight = (BYTE)w;
  518. }
  519. rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
  520. }
  521. /* Build rankVal */
  522. { U32* const rankVal0 = rankVal[0];
  523. { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
  524. U32 nextRankVal = 0;
  525. U32 w;
  526. for (w=1; w<maxW+1; w++) {
  527. U32 current = nextRankVal;
  528. nextRankVal += rankStats[w] << (w+rescale);
  529. rankVal0[w] = current;
  530. } }
  531. { U32 const minBits = tableLog+1 - maxW;
  532. U32 consumed;
  533. for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
  534. U32* const rankValPtr = rankVal[consumed];
  535. U32 w;
  536. for (w = 1; w < maxW+1; w++) {
  537. rankValPtr[w] = rankVal0[w] >> consumed;
  538. } } } }
  539. HUF_fillDTableX2(dt, maxTableLog,
  540. sortedSymbol, sizeOfSort,
  541. rankStart0, rankVal, maxW,
  542. tableLog+1);
  543. dtd.tableLog = (BYTE)maxTableLog;
  544. dtd.tableType = 1;
  545. memcpy(DTable, &dtd, sizeof(dtd));
  546. return iSize;
  547. }
  548. size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
  549. {
  550. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  551. return HUF_readDTableX2_wksp(DTable, src, srcSize,
  552. workSpace, sizeof(workSpace));
  553. }
  554. FORCE_INLINE_TEMPLATE U32
  555. HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  556. {
  557. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  558. memcpy(op, dt+val, 2);
  559. BIT_skipBits(DStream, dt[val].nbBits);
  560. return dt[val].length;
  561. }
  562. FORCE_INLINE_TEMPLATE U32
  563. HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
  564. {
  565. size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  566. memcpy(op, dt+val, 1);
  567. if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
  568. else {
  569. if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
  570. BIT_skipBits(DStream, dt[val].nbBits);
  571. if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
  572. /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
  573. DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
  574. } }
  575. return 1;
  576. }
  577. #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
  578. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  579. #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
  580. if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
  581. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  582. #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
  583. if (MEM_64bits()) \
  584. ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
  585. HINT_INLINE size_t
  586. HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
  587. const HUF_DEltX2* const dt, const U32 dtLog)
  588. {
  589. BYTE* const pStart = p;
  590. /* up to 8 symbols at a time */
  591. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
  592. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  593. HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
  594. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  595. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  596. }
  597. /* closer to end : up to 2 symbols at a time */
  598. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
  599. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  600. while (p <= pEnd-2)
  601. HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
  602. if (p < pEnd)
  603. p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
  604. return p-pStart;
  605. }
  606. FORCE_INLINE_TEMPLATE size_t
  607. HUF_decompress1X2_usingDTable_internal_body(
  608. void* dst, size_t dstSize,
  609. const void* cSrc, size_t cSrcSize,
  610. const HUF_DTable* DTable)
  611. {
  612. BIT_DStream_t bitD;
  613. /* Init */
  614. CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
  615. /* decode */
  616. { BYTE* const ostart = (BYTE*) dst;
  617. BYTE* const oend = ostart + dstSize;
  618. const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
  619. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  620. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  621. HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
  622. }
  623. /* check */
  624. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  625. /* decoded size */
  626. return dstSize;
  627. }
  628. FORCE_INLINE_TEMPLATE size_t
  629. HUF_decompress4X2_usingDTable_internal_body(
  630. void* dst, size_t dstSize,
  631. const void* cSrc, size_t cSrcSize,
  632. const HUF_DTable* DTable)
  633. {
  634. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  635. { const BYTE* const istart = (const BYTE*) cSrc;
  636. BYTE* const ostart = (BYTE*) dst;
  637. BYTE* const oend = ostart + dstSize;
  638. BYTE* const olimit = oend - (sizeof(size_t)-1);
  639. const void* const dtPtr = DTable+1;
  640. const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
  641. /* Init */
  642. BIT_DStream_t bitD1;
  643. BIT_DStream_t bitD2;
  644. BIT_DStream_t bitD3;
  645. BIT_DStream_t bitD4;
  646. size_t const length1 = MEM_readLE16(istart);
  647. size_t const length2 = MEM_readLE16(istart+2);
  648. size_t const length3 = MEM_readLE16(istart+4);
  649. size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
  650. const BYTE* const istart1 = istart + 6; /* jumpTable */
  651. const BYTE* const istart2 = istart1 + length1;
  652. const BYTE* const istart3 = istart2 + length2;
  653. const BYTE* const istart4 = istart3 + length3;
  654. size_t const segmentSize = (dstSize+3) / 4;
  655. BYTE* const opStart2 = ostart + segmentSize;
  656. BYTE* const opStart3 = opStart2 + segmentSize;
  657. BYTE* const opStart4 = opStart3 + segmentSize;
  658. BYTE* op1 = ostart;
  659. BYTE* op2 = opStart2;
  660. BYTE* op3 = opStart3;
  661. BYTE* op4 = opStart4;
  662. U32 endSignal = 1;
  663. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  664. U32 const dtLog = dtd.tableLog;
  665. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  666. CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
  667. CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
  668. CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
  669. CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
  670. /* 16-32 symbols per loop (4-8 symbols per stream) */
  671. for ( ; (endSignal) & (op4 < olimit); ) {
  672. #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
  673. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  674. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  675. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  676. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  677. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  678. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  679. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  680. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  681. endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
  682. endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
  683. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  684. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  685. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  686. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  687. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  688. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  689. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  690. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  691. endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
  692. endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
  693. #else
  694. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  695. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  696. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  697. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  698. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  699. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  700. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  701. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  702. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  703. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  704. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  705. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  706. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  707. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  708. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  709. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  710. endSignal = (U32)LIKELY(
  711. (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
  712. & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
  713. & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
  714. & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
  715. #endif
  716. }
  717. /* check corruption */
  718. if (op1 > opStart2) return ERROR(corruption_detected);
  719. if (op2 > opStart3) return ERROR(corruption_detected);
  720. if (op3 > opStart4) return ERROR(corruption_detected);
  721. /* note : op4 already verified within main loop */
  722. /* finish bitStreams one by one */
  723. HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
  724. HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
  725. HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
  726. HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
  727. /* check */
  728. { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  729. if (!endCheck) return ERROR(corruption_detected); }
  730. /* decoded size */
  731. return dstSize;
  732. }
  733. }
  734. HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
  735. HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
  736. size_t HUF_decompress1X2_usingDTable(
  737. void* dst, size_t dstSize,
  738. const void* cSrc, size_t cSrcSize,
  739. const HUF_DTable* DTable)
  740. {
  741. DTableDesc dtd = HUF_getDTableDesc(DTable);
  742. if (dtd.tableType != 1) return ERROR(GENERIC);
  743. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  744. }
  745. size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
  746. const void* cSrc, size_t cSrcSize,
  747. void* workSpace, size_t wkspSize)
  748. {
  749. const BYTE* ip = (const BYTE*) cSrc;
  750. size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
  751. workSpace, wkspSize);
  752. if (HUF_isError(hSize)) return hSize;
  753. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  754. ip += hSize; cSrcSize -= hSize;
  755. return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
  756. }
  757. size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
  758. const void* cSrc, size_t cSrcSize)
  759. {
  760. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  761. return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
  762. workSpace, sizeof(workSpace));
  763. }
  764. size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  765. {
  766. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
  767. return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  768. }
  769. size_t HUF_decompress4X2_usingDTable(
  770. void* dst, size_t dstSize,
  771. const void* cSrc, size_t cSrcSize,
  772. const HUF_DTable* DTable)
  773. {
  774. DTableDesc dtd = HUF_getDTableDesc(DTable);
  775. if (dtd.tableType != 1) return ERROR(GENERIC);
  776. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  777. }
  778. static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
  779. const void* cSrc, size_t cSrcSize,
  780. void* workSpace, size_t wkspSize, int bmi2)
  781. {
  782. const BYTE* ip = (const BYTE*) cSrc;
  783. size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
  784. workSpace, wkspSize);
  785. if (HUF_isError(hSize)) return hSize;
  786. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  787. ip += hSize; cSrcSize -= hSize;
  788. return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  789. }
  790. size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  791. const void* cSrc, size_t cSrcSize,
  792. void* workSpace, size_t wkspSize)
  793. {
  794. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
  795. }
  796. size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
  797. const void* cSrc, size_t cSrcSize)
  798. {
  799. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  800. return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  801. workSpace, sizeof(workSpace));
  802. }
  803. size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  804. {
  805. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
  806. return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
  807. }
  808. #endif /* HUF_FORCE_DECOMPRESS_X1 */
  809. /* ***********************************/
  810. /* Universal decompression selectors */
  811. /* ***********************************/
  812. size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
  813. const void* cSrc, size_t cSrcSize,
  814. const HUF_DTable* DTable)
  815. {
  816. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  817. #if defined(HUF_FORCE_DECOMPRESS_X1)
  818. (void)dtd;
  819. assert(dtd.tableType == 0);
  820. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  821. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  822. (void)dtd;
  823. assert(dtd.tableType == 1);
  824. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  825. #else
  826. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  827. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  828. #endif
  829. }
  830. size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
  831. const void* cSrc, size_t cSrcSize,
  832. const HUF_DTable* DTable)
  833. {
  834. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  835. #if defined(HUF_FORCE_DECOMPRESS_X1)
  836. (void)dtd;
  837. assert(dtd.tableType == 0);
  838. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  839. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  840. (void)dtd;
  841. assert(dtd.tableType == 1);
  842. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  843. #else
  844. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
  845. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
  846. #endif
  847. }
  848. #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
  849. typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
  850. static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
  851. {
  852. /* single, double, quad */
  853. {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
  854. {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
  855. {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
  856. {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
  857. {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
  858. {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
  859. {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
  860. {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
  861. {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
  862. {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
  863. {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
  864. {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
  865. {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
  866. {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
  867. {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
  868. {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
  869. };
  870. #endif
  871. /** HUF_selectDecoder() :
  872. * Tells which decoder is likely to decode faster,
  873. * based on a set of pre-computed metrics.
  874. * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
  875. * Assumption : 0 < dstSize <= 128 KB */
  876. U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
  877. {
  878. assert(dstSize > 0);
  879. assert(dstSize <= 128*1024);
  880. #if defined(HUF_FORCE_DECOMPRESS_X1)
  881. (void)dstSize;
  882. (void)cSrcSize;
  883. return 0;
  884. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  885. (void)dstSize;
  886. (void)cSrcSize;
  887. return 1;
  888. #else
  889. /* decoder timing evaluation */
  890. { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
  891. U32 const D256 = (U32)(dstSize >> 8);
  892. U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
  893. U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
  894. DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
  895. return DTime1 < DTime0;
  896. }
  897. #endif
  898. }
  899. typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
  900. size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  901. {
  902. #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
  903. static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
  904. #endif
  905. /* validation checks */
  906. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  907. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  908. if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  909. if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  910. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  911. #if defined(HUF_FORCE_DECOMPRESS_X1)
  912. (void)algoNb;
  913. assert(algoNb == 0);
  914. return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
  915. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  916. (void)algoNb;
  917. assert(algoNb == 1);
  918. return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
  919. #else
  920. return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
  921. #endif
  922. }
  923. }
  924. size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  925. {
  926. /* validation checks */
  927. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  928. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  929. if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  930. if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  931. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  932. #if defined(HUF_FORCE_DECOMPRESS_X1)
  933. (void)algoNb;
  934. assert(algoNb == 0);
  935. return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
  936. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  937. (void)algoNb;
  938. assert(algoNb == 1);
  939. return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
  940. #else
  941. return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
  942. HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
  943. #endif
  944. }
  945. }
  946. size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  947. {
  948. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  949. return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  950. workSpace, sizeof(workSpace));
  951. }
  952. size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
  953. size_t dstSize, const void* cSrc,
  954. size_t cSrcSize, void* workSpace,
  955. size_t wkspSize)
  956. {
  957. /* validation checks */
  958. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  959. if (cSrcSize == 0) return ERROR(corruption_detected);
  960. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  961. #if defined(HUF_FORCE_DECOMPRESS_X1)
  962. (void)algoNb;
  963. assert(algoNb == 0);
  964. return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  965. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  966. (void)algoNb;
  967. assert(algoNb == 1);
  968. return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  969. #else
  970. return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  971. cSrcSize, workSpace, wkspSize):
  972. HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
  973. #endif
  974. }
  975. }
  976. size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
  977. const void* cSrc, size_t cSrcSize,
  978. void* workSpace, size_t wkspSize)
  979. {
  980. /* validation checks */
  981. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  982. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  983. if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  984. if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  985. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  986. #if defined(HUF_FORCE_DECOMPRESS_X1)
  987. (void)algoNb;
  988. assert(algoNb == 0);
  989. return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  990. cSrcSize, workSpace, wkspSize);
  991. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  992. (void)algoNb;
  993. assert(algoNb == 1);
  994. return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  995. cSrcSize, workSpace, wkspSize);
  996. #else
  997. return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
  998. cSrcSize, workSpace, wkspSize):
  999. HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
  1000. cSrcSize, workSpace, wkspSize);
  1001. #endif
  1002. }
  1003. }
  1004. size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
  1005. const void* cSrc, size_t cSrcSize)
  1006. {
  1007. U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  1008. return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
  1009. workSpace, sizeof(workSpace));
  1010. }
  1011. size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  1012. {
  1013. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1014. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1015. (void)dtd;
  1016. assert(dtd.tableType == 0);
  1017. return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1018. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1019. (void)dtd;
  1020. assert(dtd.tableType == 1);
  1021. return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1022. #else
  1023. return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  1024. HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1025. #endif
  1026. }
  1027. #ifndef HUF_FORCE_DECOMPRESS_X2
  1028. size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1029. {
  1030. const BYTE* ip = (const BYTE*) cSrc;
  1031. size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
  1032. if (HUF_isError(hSize)) return hSize;
  1033. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  1034. ip += hSize; cSrcSize -= hSize;
  1035. return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
  1036. }
  1037. #endif
  1038. size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
  1039. {
  1040. DTableDesc const dtd = HUF_getDTableDesc(DTable);
  1041. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1042. (void)dtd;
  1043. assert(dtd.tableType == 0);
  1044. return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1045. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1046. (void)dtd;
  1047. assert(dtd.tableType == 1);
  1048. return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1049. #else
  1050. return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
  1051. HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
  1052. #endif
  1053. }
  1054. size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
  1055. {
  1056. /* validation checks */
  1057. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  1058. if (cSrcSize == 0) return ERROR(corruption_detected);
  1059. { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
  1060. #if defined(HUF_FORCE_DECOMPRESS_X1)
  1061. (void)algoNb;
  1062. assert(algoNb == 0);
  1063. return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1064. #elif defined(HUF_FORCE_DECOMPRESS_X2)
  1065. (void)algoNb;
  1066. assert(algoNb == 1);
  1067. return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1068. #else
  1069. return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
  1070. HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
  1071. #endif
  1072. }
  1073. }