You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

zstd_lazy.c 52KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. /*
  2. * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_lazy.h"
  12. /*-*************************************
  13. * Binary Tree search
  14. ***************************************/
  15. static void
  16. ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  17. const BYTE* ip, const BYTE* iend,
  18. U32 mls)
  19. {
  20. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  21. U32* const hashTable = ms->hashTable;
  22. U32 const hashLog = cParams->hashLog;
  23. U32* const bt = ms->chainTable;
  24. U32 const btLog = cParams->chainLog - 1;
  25. U32 const btMask = (1 << btLog) - 1;
  26. const BYTE* const base = ms->window.base;
  27. U32 const target = (U32)(ip - base);
  28. U32 idx = ms->nextToUpdate;
  29. if (idx != target)
  30. DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
  31. idx, target, ms->window.dictLimit);
  32. assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
  33. (void)iend;
  34. assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
  35. for ( ; idx < target ; idx++) {
  36. size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
  37. U32 const matchIndex = hashTable[h];
  38. U32* const nextCandidatePtr = bt + 2*(idx&btMask);
  39. U32* const sortMarkPtr = nextCandidatePtr + 1;
  40. DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
  41. hashTable[h] = idx; /* Update Hash Table */
  42. *nextCandidatePtr = matchIndex; /* update BT like a chain */
  43. *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
  44. }
  45. ms->nextToUpdate = target;
  46. }
  47. /** ZSTD_insertDUBT1() :
  48. * sort one already inserted but unsorted position
  49. * assumption : current >= btlow == (current - btmask)
  50. * doesn't fail */
  51. static void
  52. ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
  53. U32 current, const BYTE* inputEnd,
  54. U32 nbCompares, U32 btLow,
  55. const ZSTD_dictMode_e dictMode)
  56. {
  57. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  58. U32* const bt = ms->chainTable;
  59. U32 const btLog = cParams->chainLog - 1;
  60. U32 const btMask = (1 << btLog) - 1;
  61. size_t commonLengthSmaller=0, commonLengthLarger=0;
  62. const BYTE* const base = ms->window.base;
  63. const BYTE* const dictBase = ms->window.dictBase;
  64. const U32 dictLimit = ms->window.dictLimit;
  65. const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
  66. const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
  67. const BYTE* const dictEnd = dictBase + dictLimit;
  68. const BYTE* const prefixStart = base + dictLimit;
  69. const BYTE* match;
  70. U32* smallerPtr = bt + 2*(current&btMask);
  71. U32* largerPtr = smallerPtr + 1;
  72. U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
  73. U32 dummy32; /* to be nullified at the end */
  74. U32 const windowValid = ms->window.lowLimit;
  75. U32 const maxDistance = 1U << cParams->windowLog;
  76. U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
  77. DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
  78. current, dictLimit, windowLow);
  79. assert(current >= btLow);
  80. assert(ip < iend); /* condition for ZSTD_count */
  81. while (nbCompares-- && (matchIndex > windowLow)) {
  82. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  83. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  84. assert(matchIndex < current);
  85. /* note : all candidates are now supposed sorted,
  86. * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
  87. * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  88. if ( (dictMode != ZSTD_extDict)
  89. || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
  90. || (current < dictLimit) /* both in extDict */) {
  91. const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
  92. || (matchIndex+matchLength >= dictLimit)) ?
  93. base : dictBase;
  94. assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
  95. || (current < dictLimit) );
  96. match = mBase + matchIndex;
  97. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  98. } else {
  99. match = dictBase + matchIndex;
  100. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  101. if (matchIndex+matchLength >= dictLimit)
  102. match = base + matchIndex; /* preparation for next read of match[matchLength] */
  103. }
  104. DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  105. current, matchIndex, (U32)matchLength);
  106. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  107. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  108. }
  109. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  110. /* match is smaller than current */
  111. *smallerPtr = matchIndex; /* update smaller idx */
  112. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  113. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  114. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  115. matchIndex, btLow, nextPtr[1]);
  116. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  117. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  118. } else {
  119. /* match is larger than current */
  120. *largerPtr = matchIndex;
  121. commonLengthLarger = matchLength;
  122. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  123. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  124. matchIndex, btLow, nextPtr[0]);
  125. largerPtr = nextPtr;
  126. matchIndex = nextPtr[0];
  127. } }
  128. *smallerPtr = *largerPtr = 0;
  129. }
  130. static size_t
  131. ZSTD_DUBT_findBetterDictMatch (
  132. ZSTD_matchState_t* ms,
  133. const BYTE* const ip, const BYTE* const iend,
  134. size_t* offsetPtr,
  135. size_t bestLength,
  136. U32 nbCompares,
  137. U32 const mls,
  138. const ZSTD_dictMode_e dictMode)
  139. {
  140. const ZSTD_matchState_t * const dms = ms->dictMatchState;
  141. const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
  142. const U32 * const dictHashTable = dms->hashTable;
  143. U32 const hashLog = dmsCParams->hashLog;
  144. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  145. U32 dictMatchIndex = dictHashTable[h];
  146. const BYTE* const base = ms->window.base;
  147. const BYTE* const prefixStart = base + ms->window.dictLimit;
  148. U32 const current = (U32)(ip-base);
  149. const BYTE* const dictBase = dms->window.base;
  150. const BYTE* const dictEnd = dms->window.nextSrc;
  151. U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
  152. U32 const dictLowLimit = dms->window.lowLimit;
  153. U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
  154. U32* const dictBt = dms->chainTable;
  155. U32 const btLog = dmsCParams->chainLog - 1;
  156. U32 const btMask = (1 << btLog) - 1;
  157. U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
  158. size_t commonLengthSmaller=0, commonLengthLarger=0;
  159. (void)dictMode;
  160. assert(dictMode == ZSTD_dictMatchState);
  161. while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
  162. U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
  163. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  164. const BYTE* match = dictBase + dictMatchIndex;
  165. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  166. if (dictMatchIndex+matchLength >= dictHighLimit)
  167. match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
  168. if (matchLength > bestLength) {
  169. U32 matchIndex = dictMatchIndex + dictIndexDelta;
  170. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
  171. DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
  172. current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
  173. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  174. }
  175. if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
  176. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  177. }
  178. }
  179. if (match[matchLength] < ip[matchLength]) {
  180. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  181. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  182. dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  183. } else {
  184. /* match is larger than current */
  185. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  186. commonLengthLarger = matchLength;
  187. dictMatchIndex = nextPtr[0];
  188. }
  189. }
  190. if (bestLength >= MINMATCH) {
  191. U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  192. DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  193. current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  194. }
  195. return bestLength;
  196. }
  197. static size_t
  198. ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
  199. const BYTE* const ip, const BYTE* const iend,
  200. size_t* offsetPtr,
  201. U32 const mls,
  202. const ZSTD_dictMode_e dictMode)
  203. {
  204. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  205. U32* const hashTable = ms->hashTable;
  206. U32 const hashLog = cParams->hashLog;
  207. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  208. U32 matchIndex = hashTable[h];
  209. const BYTE* const base = ms->window.base;
  210. U32 const current = (U32)(ip-base);
  211. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
  212. U32* const bt = ms->chainTable;
  213. U32 const btLog = cParams->chainLog - 1;
  214. U32 const btMask = (1 << btLog) - 1;
  215. U32 const btLow = (btMask >= current) ? 0 : current - btMask;
  216. U32 const unsortLimit = MAX(btLow, windowLow);
  217. U32* nextCandidate = bt + 2*(matchIndex&btMask);
  218. U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  219. U32 nbCompares = 1U << cParams->searchLog;
  220. U32 nbCandidates = nbCompares;
  221. U32 previousCandidate = 0;
  222. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
  223. assert(ip <= iend-8); /* required for h calculation */
  224. /* reach end of unsorted candidates list */
  225. while ( (matchIndex > unsortLimit)
  226. && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  227. && (nbCandidates > 1) ) {
  228. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  229. matchIndex);
  230. *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
  231. previousCandidate = matchIndex;
  232. matchIndex = *nextCandidate;
  233. nextCandidate = bt + 2*(matchIndex&btMask);
  234. unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  235. nbCandidates --;
  236. }
  237. /* nullify last candidate if it's still unsorted
  238. * simplification, detrimental to compression ratio, beneficial for speed */
  239. if ( (matchIndex > unsortLimit)
  240. && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  241. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  242. matchIndex);
  243. *nextCandidate = *unsortedMark = 0;
  244. }
  245. /* batch sort stacked candidates */
  246. matchIndex = previousCandidate;
  247. while (matchIndex) { /* will end on matchIndex == 0 */
  248. U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  249. U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  250. ZSTD_insertDUBT1(ms, matchIndex, iend,
  251. nbCandidates, unsortLimit, dictMode);
  252. matchIndex = nextCandidateIdx;
  253. nbCandidates++;
  254. }
  255. /* find longest match */
  256. { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
  257. const BYTE* const dictBase = ms->window.dictBase;
  258. const U32 dictLimit = ms->window.dictLimit;
  259. const BYTE* const dictEnd = dictBase + dictLimit;
  260. const BYTE* const prefixStart = base + dictLimit;
  261. U32* smallerPtr = bt + 2*(current&btMask);
  262. U32* largerPtr = bt + 2*(current&btMask) + 1;
  263. U32 matchEndIdx = current + 8 + 1;
  264. U32 dummy32; /* to be nullified at the end */
  265. size_t bestLength = 0;
  266. matchIndex = hashTable[h];
  267. hashTable[h] = current; /* Update Hash Table */
  268. while (nbCompares-- && (matchIndex > windowLow)) {
  269. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  270. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  271. const BYTE* match;
  272. if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
  273. match = base + matchIndex;
  274. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  275. } else {
  276. match = dictBase + matchIndex;
  277. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  278. if (matchIndex+matchLength >= dictLimit)
  279. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  280. }
  281. if (matchLength > bestLength) {
  282. if (matchLength > matchEndIdx - matchIndex)
  283. matchEndIdx = matchIndex + (U32)matchLength;
  284. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  285. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  286. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  287. if (dictMode == ZSTD_dictMatchState) {
  288. nbCompares = 0; /* in addition to avoiding checking any
  289. * further in this loop, make sure we
  290. * skip checking in the dictionary. */
  291. }
  292. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  293. }
  294. }
  295. if (match[matchLength] < ip[matchLength]) {
  296. /* match is smaller than current */
  297. *smallerPtr = matchIndex; /* update smaller idx */
  298. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  299. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  300. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  301. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  302. } else {
  303. /* match is larger than current */
  304. *largerPtr = matchIndex;
  305. commonLengthLarger = matchLength;
  306. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  307. largerPtr = nextPtr;
  308. matchIndex = nextPtr[0];
  309. } }
  310. *smallerPtr = *largerPtr = 0;
  311. if (dictMode == ZSTD_dictMatchState && nbCompares) {
  312. bestLength = ZSTD_DUBT_findBetterDictMatch(
  313. ms, ip, iend,
  314. offsetPtr, bestLength, nbCompares,
  315. mls, dictMode);
  316. }
  317. assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
  318. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  319. if (bestLength >= MINMATCH) {
  320. U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  321. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  322. current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  323. }
  324. return bestLength;
  325. }
  326. }
  327. /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  328. FORCE_INLINE_TEMPLATE size_t
  329. ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
  330. const BYTE* const ip, const BYTE* const iLimit,
  331. size_t* offsetPtr,
  332. const U32 mls /* template */,
  333. const ZSTD_dictMode_e dictMode)
  334. {
  335. DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  336. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  337. ZSTD_updateDUBT(ms, ip, iLimit, mls);
  338. return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
  339. }
  340. static size_t
  341. ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
  342. const BYTE* ip, const BYTE* const iLimit,
  343. size_t* offsetPtr)
  344. {
  345. switch(ms->cParams.minMatch)
  346. {
  347. default : /* includes case 3 */
  348. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  349. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  350. case 7 :
  351. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  352. }
  353. }
  354. static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
  355. ZSTD_matchState_t* ms,
  356. const BYTE* ip, const BYTE* const iLimit,
  357. size_t* offsetPtr)
  358. {
  359. switch(ms->cParams.minMatch)
  360. {
  361. default : /* includes case 3 */
  362. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  363. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  364. case 7 :
  365. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  366. }
  367. }
  368. static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  369. ZSTD_matchState_t* ms,
  370. const BYTE* ip, const BYTE* const iLimit,
  371. size_t* offsetPtr)
  372. {
  373. switch(ms->cParams.minMatch)
  374. {
  375. default : /* includes case 3 */
  376. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  377. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  378. case 7 :
  379. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  380. }
  381. }
  382. /* *********************************
  383. * Hash Chain
  384. ***********************************/
  385. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
  386. /* Update chains up to ip (excluded)
  387. Assumption : always within prefix (i.e. not within extDict) */
  388. static U32 ZSTD_insertAndFindFirstIndex_internal(
  389. ZSTD_matchState_t* ms,
  390. const ZSTD_compressionParameters* const cParams,
  391. const BYTE* ip, U32 const mls)
  392. {
  393. U32* const hashTable = ms->hashTable;
  394. const U32 hashLog = cParams->hashLog;
  395. U32* const chainTable = ms->chainTable;
  396. const U32 chainMask = (1 << cParams->chainLog) - 1;
  397. const BYTE* const base = ms->window.base;
  398. const U32 target = (U32)(ip - base);
  399. U32 idx = ms->nextToUpdate;
  400. while(idx < target) { /* catch up */
  401. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  402. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  403. hashTable[h] = idx;
  404. idx++;
  405. }
  406. ms->nextToUpdate = target;
  407. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  408. }
  409. U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
  410. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  411. return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
  412. }
  413. /* inlining is important to hardwire a hot branch (template emulation) */
  414. FORCE_INLINE_TEMPLATE
  415. size_t ZSTD_HcFindBestMatch_generic (
  416. ZSTD_matchState_t* ms,
  417. const BYTE* const ip, const BYTE* const iLimit,
  418. size_t* offsetPtr,
  419. const U32 mls, const ZSTD_dictMode_e dictMode)
  420. {
  421. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  422. U32* const chainTable = ms->chainTable;
  423. const U32 chainSize = (1 << cParams->chainLog);
  424. const U32 chainMask = chainSize-1;
  425. const BYTE* const base = ms->window.base;
  426. const BYTE* const dictBase = ms->window.dictBase;
  427. const U32 dictLimit = ms->window.dictLimit;
  428. const BYTE* const prefixStart = base + dictLimit;
  429. const BYTE* const dictEnd = dictBase + dictLimit;
  430. const U32 current = (U32)(ip-base);
  431. const U32 maxDistance = 1U << cParams->windowLog;
  432. const U32 lowestValid = ms->window.lowLimit;
  433. const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
  434. const U32 isDictionary = (ms->loadedDictEnd != 0);
  435. const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
  436. const U32 minChain = current > chainSize ? current - chainSize : 0;
  437. U32 nbAttempts = 1U << cParams->searchLog;
  438. size_t ml=4-1;
  439. /* HC4 match finder */
  440. U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  441. for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  442. size_t currentMl=0;
  443. if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
  444. const BYTE* const match = base + matchIndex;
  445. assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
  446. if (match[ml] == ip[ml]) /* potentially better */
  447. currentMl = ZSTD_count(ip, match, iLimit);
  448. } else {
  449. const BYTE* const match = dictBase + matchIndex;
  450. assert(match+4 <= dictEnd);
  451. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  452. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  453. }
  454. /* save best solution */
  455. if (currentMl > ml) {
  456. ml = currentMl;
  457. *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
  458. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  459. }
  460. if (matchIndex <= minChain) break;
  461. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  462. }
  463. if (dictMode == ZSTD_dictMatchState) {
  464. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  465. const U32* const dmsChainTable = dms->chainTable;
  466. const U32 dmsChainSize = (1 << dms->cParams.chainLog);
  467. const U32 dmsChainMask = dmsChainSize - 1;
  468. const U32 dmsLowestIndex = dms->window.dictLimit;
  469. const BYTE* const dmsBase = dms->window.base;
  470. const BYTE* const dmsEnd = dms->window.nextSrc;
  471. const U32 dmsSize = (U32)(dmsEnd - dmsBase);
  472. const U32 dmsIndexDelta = dictLimit - dmsSize;
  473. const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
  474. matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
  475. for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
  476. size_t currentMl=0;
  477. const BYTE* const match = dmsBase + matchIndex;
  478. assert(match+4 <= dmsEnd);
  479. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  480. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
  481. /* save best solution */
  482. if (currentMl > ml) {
  483. ml = currentMl;
  484. *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
  485. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  486. }
  487. if (matchIndex <= dmsMinChain) break;
  488. matchIndex = dmsChainTable[matchIndex & dmsChainMask];
  489. }
  490. }
  491. return ml;
  492. }
  493. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  494. ZSTD_matchState_t* ms,
  495. const BYTE* ip, const BYTE* const iLimit,
  496. size_t* offsetPtr)
  497. {
  498. switch(ms->cParams.minMatch)
  499. {
  500. default : /* includes case 3 */
  501. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  502. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  503. case 7 :
  504. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  505. }
  506. }
  507. static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
  508. ZSTD_matchState_t* ms,
  509. const BYTE* ip, const BYTE* const iLimit,
  510. size_t* offsetPtr)
  511. {
  512. switch(ms->cParams.minMatch)
  513. {
  514. default : /* includes case 3 */
  515. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  516. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  517. case 7 :
  518. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  519. }
  520. }
  521. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  522. ZSTD_matchState_t* ms,
  523. const BYTE* ip, const BYTE* const iLimit,
  524. size_t* offsetPtr)
  525. {
  526. switch(ms->cParams.minMatch)
  527. {
  528. default : /* includes case 3 */
  529. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  530. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  531. case 7 :
  532. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  533. }
  534. }
  535. /* *******************************
  536. * Common parser - lazy strategy
  537. *********************************/
  538. typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
  539. FORCE_INLINE_TEMPLATE size_t
  540. ZSTD_compressBlock_lazy_generic(
  541. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  542. U32 rep[ZSTD_REP_NUM],
  543. const void* src, size_t srcSize,
  544. const searchMethod_e searchMethod, const U32 depth,
  545. ZSTD_dictMode_e const dictMode)
  546. {
  547. const BYTE* const istart = (const BYTE*)src;
  548. const BYTE* ip = istart;
  549. const BYTE* anchor = istart;
  550. const BYTE* const iend = istart + srcSize;
  551. const BYTE* const ilimit = iend - 8;
  552. const BYTE* const base = ms->window.base;
  553. const U32 prefixLowestIndex = ms->window.dictLimit;
  554. const BYTE* const prefixLowest = base + prefixLowestIndex;
  555. typedef size_t (*searchMax_f)(
  556. ZSTD_matchState_t* ms,
  557. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  558. searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
  559. (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
  560. : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
  561. (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
  562. : ZSTD_HcFindBestMatch_selectMLS);
  563. U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  564. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  565. const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
  566. dms->window.dictLimit : 0;
  567. const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
  568. dms->window.base : NULL;
  569. const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
  570. dictBase + dictLowestIndex : NULL;
  571. const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
  572. dms->window.nextSrc : NULL;
  573. const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
  574. prefixLowestIndex - (U32)(dictEnd - dictBase) :
  575. 0;
  576. const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
  577. DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
  578. /* init */
  579. ip += (dictAndPrefixLength == 0);
  580. if (dictMode == ZSTD_noDict) {
  581. U32 const current = (U32)(ip - base);
  582. U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
  583. U32 const maxRep = current - windowLow;
  584. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  585. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  586. }
  587. if (dictMode == ZSTD_dictMatchState) {
  588. /* dictMatchState repCode checks don't currently handle repCode == 0
  589. * disabling. */
  590. assert(offset_1 <= dictAndPrefixLength);
  591. assert(offset_2 <= dictAndPrefixLength);
  592. }
  593. /* Match Loop */
  594. #if defined(__GNUC__) && defined(__x86_64__)
  595. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  596. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  597. */
  598. __asm__(".p2align 5");
  599. #endif
  600. while (ip < ilimit) {
  601. size_t matchLength=0;
  602. size_t offset=0;
  603. const BYTE* start=ip+1;
  604. /* check repCode */
  605. if (dictMode == ZSTD_dictMatchState) {
  606. const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
  607. const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
  608. && repIndex < prefixLowestIndex) ?
  609. dictBase + (repIndex - dictIndexDelta) :
  610. base + repIndex;
  611. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  612. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  613. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  614. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  615. if (depth==0) goto _storeSequence;
  616. }
  617. }
  618. if ( dictMode == ZSTD_noDict
  619. && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  620. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  621. if (depth==0) goto _storeSequence;
  622. }
  623. /* first search (depth 0) */
  624. { size_t offsetFound = 999999999;
  625. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  626. if (ml2 > matchLength)
  627. matchLength = ml2, start = ip, offset=offsetFound;
  628. }
  629. if (matchLength < 4) {
  630. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  631. continue;
  632. }
  633. /* let's try to find a better solution */
  634. if (depth>=1)
  635. while (ip<ilimit) {
  636. ip ++;
  637. if ( (dictMode == ZSTD_noDict)
  638. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  639. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  640. int const gain2 = (int)(mlRep * 3);
  641. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  642. if ((mlRep >= 4) && (gain2 > gain1))
  643. matchLength = mlRep, offset = 0, start = ip;
  644. }
  645. if (dictMode == ZSTD_dictMatchState) {
  646. const U32 repIndex = (U32)(ip - base) - offset_1;
  647. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  648. dictBase + (repIndex - dictIndexDelta) :
  649. base + repIndex;
  650. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  651. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  652. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  653. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  654. int const gain2 = (int)(mlRep * 3);
  655. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  656. if ((mlRep >= 4) && (gain2 > gain1))
  657. matchLength = mlRep, offset = 0, start = ip;
  658. }
  659. }
  660. { size_t offset2=999999999;
  661. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  662. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  663. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  664. if ((ml2 >= 4) && (gain2 > gain1)) {
  665. matchLength = ml2, offset = offset2, start = ip;
  666. continue; /* search a better one */
  667. } }
  668. /* let's find an even better one */
  669. if ((depth==2) && (ip<ilimit)) {
  670. ip ++;
  671. if ( (dictMode == ZSTD_noDict)
  672. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  673. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  674. int const gain2 = (int)(mlRep * 4);
  675. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  676. if ((mlRep >= 4) && (gain2 > gain1))
  677. matchLength = mlRep, offset = 0, start = ip;
  678. }
  679. if (dictMode == ZSTD_dictMatchState) {
  680. const U32 repIndex = (U32)(ip - base) - offset_1;
  681. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  682. dictBase + (repIndex - dictIndexDelta) :
  683. base + repIndex;
  684. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  685. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  686. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  687. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  688. int const gain2 = (int)(mlRep * 4);
  689. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  690. if ((mlRep >= 4) && (gain2 > gain1))
  691. matchLength = mlRep, offset = 0, start = ip;
  692. }
  693. }
  694. { size_t offset2=999999999;
  695. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  696. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  697. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  698. if ((ml2 >= 4) && (gain2 > gain1)) {
  699. matchLength = ml2, offset = offset2, start = ip;
  700. continue;
  701. } } }
  702. break; /* nothing found : store previous solution */
  703. }
  704. /* NOTE:
  705. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  706. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  707. * overflows the pointer, which is undefined behavior.
  708. */
  709. /* catch up */
  710. if (offset) {
  711. if (dictMode == ZSTD_noDict) {
  712. while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
  713. && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
  714. { start--; matchLength++; }
  715. }
  716. if (dictMode == ZSTD_dictMatchState) {
  717. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  718. const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
  719. const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
  720. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  721. }
  722. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  723. }
  724. /* store sequence */
  725. _storeSequence:
  726. { size_t const litLength = start - anchor;
  727. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  728. anchor = ip = start + matchLength;
  729. }
  730. /* check immediate repcode */
  731. if (dictMode == ZSTD_dictMatchState) {
  732. while (ip <= ilimit) {
  733. U32 const current2 = (U32)(ip-base);
  734. U32 const repIndex = current2 - offset_2;
  735. const BYTE* repMatch = dictMode == ZSTD_dictMatchState
  736. && repIndex < prefixLowestIndex ?
  737. dictBase - dictIndexDelta + repIndex :
  738. base + repIndex;
  739. if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
  740. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  741. const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
  742. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
  743. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
  744. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  745. ip += matchLength;
  746. anchor = ip;
  747. continue;
  748. }
  749. break;
  750. }
  751. }
  752. if (dictMode == ZSTD_noDict) {
  753. while ( ((ip <= ilimit) & (offset_2>0))
  754. && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  755. /* store sequence */
  756. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  757. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  758. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  759. ip += matchLength;
  760. anchor = ip;
  761. continue; /* faster when present ... (?) */
  762. } } }
  763. /* Save reps for next block */
  764. rep[0] = offset_1 ? offset_1 : savedOffset;
  765. rep[1] = offset_2 ? offset_2 : savedOffset;
  766. /* Return the last literals size */
  767. return (size_t)(iend - anchor);
  768. }
  769. size_t ZSTD_compressBlock_btlazy2(
  770. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  771. void const* src, size_t srcSize)
  772. {
  773. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
  774. }
  775. size_t ZSTD_compressBlock_lazy2(
  776. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  777. void const* src, size_t srcSize)
  778. {
  779. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
  780. }
  781. size_t ZSTD_compressBlock_lazy(
  782. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  783. void const* src, size_t srcSize)
  784. {
  785. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
  786. }
  787. size_t ZSTD_compressBlock_greedy(
  788. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  789. void const* src, size_t srcSize)
  790. {
  791. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
  792. }
  793. size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  794. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  795. void const* src, size_t srcSize)
  796. {
  797. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
  798. }
  799. size_t ZSTD_compressBlock_lazy2_dictMatchState(
  800. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  801. void const* src, size_t srcSize)
  802. {
  803. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
  804. }
  805. size_t ZSTD_compressBlock_lazy_dictMatchState(
  806. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  807. void const* src, size_t srcSize)
  808. {
  809. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
  810. }
  811. size_t ZSTD_compressBlock_greedy_dictMatchState(
  812. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  813. void const* src, size_t srcSize)
  814. {
  815. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
  816. }
  817. FORCE_INLINE_TEMPLATE
  818. size_t ZSTD_compressBlock_lazy_extDict_generic(
  819. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  820. U32 rep[ZSTD_REP_NUM],
  821. const void* src, size_t srcSize,
  822. const searchMethod_e searchMethod, const U32 depth)
  823. {
  824. const BYTE* const istart = (const BYTE*)src;
  825. const BYTE* ip = istart;
  826. const BYTE* anchor = istart;
  827. const BYTE* const iend = istart + srcSize;
  828. const BYTE* const ilimit = iend - 8;
  829. const BYTE* const base = ms->window.base;
  830. const U32 dictLimit = ms->window.dictLimit;
  831. const BYTE* const prefixStart = base + dictLimit;
  832. const BYTE* const dictBase = ms->window.dictBase;
  833. const BYTE* const dictEnd = dictBase + dictLimit;
  834. const BYTE* const dictStart = dictBase + ms->window.lowLimit;
  835. const U32 windowLog = ms->cParams.windowLog;
  836. typedef size_t (*searchMax_f)(
  837. ZSTD_matchState_t* ms,
  838. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  839. searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
  840. U32 offset_1 = rep[0], offset_2 = rep[1];
  841. DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
  842. /* init */
  843. ip += (ip == prefixStart);
  844. /* Match Loop */
  845. #if defined(__GNUC__) && defined(__x86_64__)
  846. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  847. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  848. */
  849. __asm__(".p2align 5");
  850. #endif
  851. while (ip < ilimit) {
  852. size_t matchLength=0;
  853. size_t offset=0;
  854. const BYTE* start=ip+1;
  855. U32 current = (U32)(ip-base);
  856. /* check repCode */
  857. { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
  858. const U32 repIndex = (U32)(current+1 - offset_1);
  859. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  860. const BYTE* const repMatch = repBase + repIndex;
  861. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  862. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  863. /* repcode detected we should take it */
  864. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  865. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  866. if (depth==0) goto _storeSequence;
  867. } }
  868. /* first search (depth 0) */
  869. { size_t offsetFound = 999999999;
  870. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  871. if (ml2 > matchLength)
  872. matchLength = ml2, start = ip, offset=offsetFound;
  873. }
  874. if (matchLength < 4) {
  875. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  876. continue;
  877. }
  878. /* let's try to find a better solution */
  879. if (depth>=1)
  880. while (ip<ilimit) {
  881. ip ++;
  882. current++;
  883. /* check repCode */
  884. if (offset) {
  885. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
  886. const U32 repIndex = (U32)(current - offset_1);
  887. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  888. const BYTE* const repMatch = repBase + repIndex;
  889. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  890. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  891. /* repcode detected */
  892. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  893. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  894. int const gain2 = (int)(repLength * 3);
  895. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  896. if ((repLength >= 4) && (gain2 > gain1))
  897. matchLength = repLength, offset = 0, start = ip;
  898. } }
  899. /* search match, depth 1 */
  900. { size_t offset2=999999999;
  901. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  902. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  903. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  904. if ((ml2 >= 4) && (gain2 > gain1)) {
  905. matchLength = ml2, offset = offset2, start = ip;
  906. continue; /* search a better one */
  907. } }
  908. /* let's find an even better one */
  909. if ((depth==2) && (ip<ilimit)) {
  910. ip ++;
  911. current++;
  912. /* check repCode */
  913. if (offset) {
  914. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
  915. const U32 repIndex = (U32)(current - offset_1);
  916. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  917. const BYTE* const repMatch = repBase + repIndex;
  918. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  919. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  920. /* repcode detected */
  921. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  922. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  923. int const gain2 = (int)(repLength * 4);
  924. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  925. if ((repLength >= 4) && (gain2 > gain1))
  926. matchLength = repLength, offset = 0, start = ip;
  927. } }
  928. /* search match, depth 2 */
  929. { size_t offset2=999999999;
  930. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  931. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  932. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  933. if ((ml2 >= 4) && (gain2 > gain1)) {
  934. matchLength = ml2, offset = offset2, start = ip;
  935. continue;
  936. } } }
  937. break; /* nothing found : store previous solution */
  938. }
  939. /* catch up */
  940. if (offset) {
  941. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  942. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  943. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  944. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  945. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  946. }
  947. /* store sequence */
  948. _storeSequence:
  949. { size_t const litLength = start - anchor;
  950. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  951. anchor = ip = start + matchLength;
  952. }
  953. /* check immediate repcode */
  954. while (ip <= ilimit) {
  955. const U32 repCurrent = (U32)(ip-base);
  956. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
  957. const U32 repIndex = repCurrent - offset_2;
  958. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  959. const BYTE* const repMatch = repBase + repIndex;
  960. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  961. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  962. /* repcode detected we should take it */
  963. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  964. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  965. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  966. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  967. ip += matchLength;
  968. anchor = ip;
  969. continue; /* faster when present ... (?) */
  970. }
  971. break;
  972. } }
  973. /* Save reps for next block */
  974. rep[0] = offset_1;
  975. rep[1] = offset_2;
  976. /* Return the last literals size */
  977. return (size_t)(iend - anchor);
  978. }
  979. size_t ZSTD_compressBlock_greedy_extDict(
  980. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  981. void const* src, size_t srcSize)
  982. {
  983. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
  984. }
  985. size_t ZSTD_compressBlock_lazy_extDict(
  986. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  987. void const* src, size_t srcSize)
  988. {
  989. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
  990. }
  991. size_t ZSTD_compressBlock_lazy2_extDict(
  992. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  993. void const* src, size_t srcSize)
  994. {
  995. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
  996. }
  997. size_t ZSTD_compressBlock_btlazy2_extDict(
  998. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  999. void const* src, size_t srcSize)
  1000. {
  1001. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
  1002. }