@@ -25,7 +25,7 @@ | |||
| t1ha | ? | Zlib | NO | | | |||
| uthash | 1.9.8 | BSD | YES | | | |||
| xxhash | 0.8.1 | BSD | NO | | | |||
| zstd | 1.4.5 | BSD | NO | | | |||
| zstd | 1.5.4 | BSD | YES | build fixes only | | |||
| google-ced | 37529e6 | Apache 2 | YES | build fixes | | |||
| kann | ? | MIT | YES | blas/lapack changes| | |||
| fpconv | ? | Boost | YES | many changes | |
@@ -9,7 +9,6 @@ SET(ZSTDSRC | |||
huf_compress.c | |||
huf_decompress.c | |||
pool.c | |||
threading.c | |||
zstd_common.c | |||
zstd_compress.c | |||
zstd_compress_literals.c | |||
@@ -25,4 +24,4 @@ SET(ZSTDSRC | |||
zstd_opt.c) | |||
ADD_LIBRARY(rspamd-zstd STATIC ${ZSTDSRC}) | |||
ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY) | |||
ADD_DEFINITIONS(-DZSTD_DISABLE_ASM) |
@@ -0,0 +1,175 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_BITS_H | |||
#define ZSTD_BITS_H | |||
#include "mem.h" | |||
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) | |||
{ | |||
assert(val != 0); | |||
{ | |||
static const int DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, | |||
30, 22, 20, 15, 25, 17, 4, 8, | |||
31, 27, 13, 23, 21, 19, 16, 7, | |||
26, 12, 18, 6, 11, 5, 10, 9}; | |||
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; | |||
} | |||
} | |||
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) | |||
{ | |||
assert(val != 0); | |||
# if defined(_MSC_VER) | |||
# if STATIC_BMI2 == 1 | |||
return _tzcnt_u32(val); | |||
# else | |||
if (val != 0) { | |||
unsigned long r; | |||
_BitScanForward(&r, val); | |||
return (unsigned)r; | |||
} else { | |||
/* Should not reach this code path */ | |||
__assume(0); | |||
} | |||
# endif | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) | |||
return (unsigned)__builtin_ctz(val); | |||
# else | |||
return ZSTD_countTrailingZeros32_fallback(val); | |||
# endif | |||
} | |||
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) { | |||
assert(val != 0); | |||
{ | |||
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, | |||
11, 14, 16, 18, 22, 25, 3, 30, | |||
8, 12, 20, 28, 15, 17, 24, 7, | |||
19, 27, 23, 6, 26, 5, 4, 31}; | |||
val |= val >> 1; | |||
val |= val >> 2; | |||
val |= val >> 4; | |||
val |= val >> 8; | |||
val |= val >> 16; | |||
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; | |||
} | |||
} | |||
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) | |||
{ | |||
assert(val != 0); | |||
# if defined(_MSC_VER) | |||
# if STATIC_BMI2 == 1 | |||
return _lzcnt_u32(val); | |||
# else | |||
if (val != 0) { | |||
unsigned long r; | |||
_BitScanReverse(&r, val); | |||
return (unsigned)(31 - r); | |||
} else { | |||
/* Should not reach this code path */ | |||
__assume(0); | |||
} | |||
# endif | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) | |||
return (unsigned)__builtin_clz(val); | |||
# else | |||
return ZSTD_countLeadingZeros32_fallback(val); | |||
# endif | |||
} | |||
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) | |||
{ | |||
assert(val != 0); | |||
# if defined(_MSC_VER) && defined(_WIN64) | |||
# if STATIC_BMI2 == 1 | |||
return _tzcnt_u64(val); | |||
# else | |||
if (val != 0) { | |||
unsigned long r; | |||
_BitScanForward64(&r, val); | |||
return (unsigned)r; | |||
} else { | |||
/* Should not reach this code path */ | |||
__assume(0); | |||
} | |||
# endif | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__) | |||
return (unsigned)__builtin_ctzll(val); | |||
# else | |||
{ | |||
U32 mostSignificantWord = (U32)(val >> 32); | |||
U32 leastSignificantWord = (U32)val; | |||
if (leastSignificantWord == 0) { | |||
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); | |||
} else { | |||
return ZSTD_countTrailingZeros32(leastSignificantWord); | |||
} | |||
} | |||
# endif | |||
} | |||
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) | |||
{ | |||
assert(val != 0); | |||
# if defined(_MSC_VER) && defined(_WIN64) | |||
# if STATIC_BMI2 == 1 | |||
return _lzcnt_u64(val); | |||
# else | |||
if (val != 0) { | |||
unsigned long r; | |||
_BitScanReverse64(&r, val); | |||
return (unsigned)(63 - r); | |||
} else { | |||
/* Should not reach this code path */ | |||
__assume(0); | |||
} | |||
# endif | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) | |||
return (unsigned)(__builtin_clzll(val)); | |||
# else | |||
{ | |||
U32 mostSignificantWord = (U32)(val >> 32); | |||
U32 leastSignificantWord = (U32)val; | |||
if (mostSignificantWord == 0) { | |||
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); | |||
} else { | |||
return ZSTD_countLeadingZeros32(mostSignificantWord); | |||
} | |||
} | |||
# endif | |||
} | |||
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) | |||
{ | |||
if (MEM_isLittleEndian()) { | |||
if (MEM_64bits()) { | |||
return ZSTD_countTrailingZeros64((U64)val) >> 3; | |||
} else { | |||
return ZSTD_countTrailingZeros32((U32)val) >> 3; | |||
} | |||
} else { /* Big Endian CPU */ | |||
if (MEM_64bits()) { | |||
return ZSTD_countLeadingZeros64((U64)val) >> 3; | |||
} else { | |||
return ZSTD_countLeadingZeros32((U32)val) >> 3; | |||
} | |||
} | |||
} | |||
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ | |||
{ | |||
assert(val != 0); | |||
return 31 - ZSTD_countLeadingZeros32(val); | |||
} | |||
#endif /* ZSTD_BITS_H */ |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* bitstream | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -17,7 +17,6 @@ | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/* | |||
* This API consists of small unitary functions, which must be inlined for best performance. | |||
* Since link-time-optimization is not available for all compilers, | |||
@@ -31,15 +30,18 @@ extern "C" { | |||
#include "compiler.h" /* UNLIKELY() */ | |||
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ | |||
#include "error_private.h" /* error codes and messages */ | |||
#include "bits.h" /* ZSTD_highbit32 */ | |||
/*========================================= | |||
* Target specific | |||
=========================================*/ | |||
#if defined(__BMI__) && defined(__GNUC__) | |||
# include <immintrin.h> /* support for bextr (experimental) */ | |||
#elif defined(__ICCARM__) | |||
# include <intrinsics.h> | |||
#ifndef ZSTD_NO_INTRINSICS | |||
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) | |||
# include <immintrin.h> /* support for bextr (experimental)/bzhi */ | |||
# elif defined(__ICCARM__) | |||
# include <intrinsics.h> | |||
# endif | |||
#endif | |||
#define STREAM_ACCUMULATOR_MIN_32 25 | |||
@@ -131,38 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); | |||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); | |||
/* faster, but works only if nbBits >= 1 */ | |||
/*-************************************************************** | |||
* Internal functions | |||
****************************************************************/ | |||
MEM_STATIC unsigned BIT_highbit32 (U32 val) | |||
{ | |||
assert(val != 0); | |||
{ | |||
# if defined(_MSC_VER) /* Visual */ | |||
unsigned long r=0; | |||
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; | |||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | |||
return __builtin_clz (val) ^ 31; | |||
# elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
return 31 - __CLZ(val); | |||
# else /* Software version */ | |||
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | |||
11, 14, 16, 18, 22, 25, 3, 30, | |||
8, 12, 20, 28, 15, 17, 24, 7, | |||
19, 27, 23, 6, 26, 5, 4, 31 }; | |||
U32 v = val; | |||
v |= v >> 1; | |||
v |= v >> 2; | |||
v |= v >> 4; | |||
v |= v >> 8; | |||
v |= v >> 16; | |||
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; | |||
# endif | |||
} | |||
} | |||
/*===== Local Constants =====*/ | |||
static const unsigned BIT_mask[] = { | |||
0, 1, 3, 7, 0xF, 0x1F, | |||
@@ -192,16 +162,26 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, | |||
return 0; | |||
} | |||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |||
{ | |||
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) | |||
return _bzhi_u64(bitContainer, nbBits); | |||
#else | |||
assert(nbBits < BIT_MASK_SIZE); | |||
return bitContainer & BIT_mask[nbBits]; | |||
#endif | |||
} | |||
/*! BIT_addBits() : | |||
* can add up to 31 bits into `bitC`. | |||
* Note : does not check for register overflow ! */ | |||
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, | |||
size_t value, unsigned nbBits) | |||
{ | |||
MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); | |||
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); | |||
assert(nbBits < BIT_MASK_SIZE); | |||
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; | |||
bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; | |||
bitC->bitPos += nbBits; | |||
} | |||
@@ -271,7 +251,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) | |||
*/ | |||
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) | |||
{ | |||
if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } | |||
if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } | |||
bitD->start = (const char*)srcBuffer; | |||
bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); | |||
@@ -280,7 +260,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si | |||
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); | |||
bitD->bitContainer = MEM_readLEST(bitD->ptr); | |||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | |||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | |||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | |||
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } | |||
} else { | |||
bitD->ptr = bitD->start; | |||
@@ -288,27 +268,27 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si | |||
switch(srcSize) | |||
{ | |||
case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; | |||
/* fall-through */ | |||
ZSTD_FALLTHROUGH; | |||
default: break; | |||
} | |||
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | |||
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; | |||
bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; | |||
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ | |||
} | |||
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; | |||
@@ -317,23 +297,26 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si | |||
return srcSize; | |||
} | |||
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) | |||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) | |||
{ | |||
return bitContainer >> start; | |||
} | |||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) | |||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) | |||
{ | |||
U32 const regMask = sizeof(bitContainer)*8 - 1; | |||
/* if start > regMask, bitstream is corrupted, and result is undefined */ | |||
assert(nbBits < BIT_MASK_SIZE); | |||
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better | |||
* than accessing memory. When bmi2 instruction is not present, we consider | |||
* such cpus old (pre-Haswell, 2013) and their performance is not of that | |||
* importance. | |||
*/ | |||
#if defined(__x86_64__) || defined(_M_X86) | |||
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); | |||
#else | |||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; | |||
} | |||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |||
{ | |||
assert(nbBits < BIT_MASK_SIZE); | |||
return bitContainer & BIT_mask[nbBits]; | |||
#endif | |||
} | |||
/*! BIT_lookBits() : | |||
@@ -342,7 +325,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |||
* On 32-bits, maxNbBits==24. | |||
* On 64-bits, maxNbBits==56. | |||
* @return : value extracted */ | |||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | |||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | |||
{ | |||
/* arbitrate between double-shift and shift+mask */ | |||
#if 1 | |||
@@ -365,7 +348,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) | |||
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); | |||
} | |||
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | |||
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | |||
{ | |||
bitD->bitsConsumed += nbBits; | |||
} | |||
@@ -374,7 +357,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | |||
* Read (consume) next n bits from local register and update. | |||
* Pay attention to not read more than nbBits contained into local register. | |||
* @return : extracted value. */ | |||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | |||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | |||
{ | |||
size_t const value = BIT_lookBits(bitD, nbBits); | |||
BIT_skipBits(bitD, nbBits); | |||
@@ -382,7 +365,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | |||
} | |||
/*! BIT_readBitsFast() : | |||
* unsafe version; only works only if nbBits >= 1 */ | |||
* unsafe version; only works if nbBits >= 1 */ | |||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) | |||
{ | |||
size_t const value = BIT_lookBitsFast(bitD, nbBits); |
@@ -0,0 +1,134 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_CLEVELS_H | |||
#define ZSTD_CLEVELS_H | |||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ | |||
#include "zstd.h" | |||
/*-===== Pre-defined compression levels =====-*/ | |||
#define ZSTD_MAX_CLEVEL 22 | |||
#ifdef __GNUC__ | |||
__attribute__((__unused__)) | |||
#endif | |||
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { | |||
{ /* "default" - for any srcSize > 256 KB */ | |||
/* W, C, H, S, L, TL, strat */ | |||
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ | |||
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ | |||
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ | |||
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ | |||
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ | |||
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ | |||
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ | |||
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ | |||
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ | |||
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ | |||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ | |||
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ | |||
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ | |||
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ | |||
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ | |||
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ | |||
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ | |||
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ | |||
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ | |||
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ | |||
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ | |||
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ | |||
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ | |||
}, | |||
{ /* for srcSize <= 256 KB */ | |||
/* W, C, H, S, L, T, strat */ | |||
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ | |||
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ | |||
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ | |||
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ | |||
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ | |||
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ | |||
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ | |||
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ | |||
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ | |||
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ | |||
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ | |||
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ | |||
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ | |||
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ | |||
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ | |||
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ | |||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ | |||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ | |||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ | |||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ | |||
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ | |||
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ | |||
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ | |||
}, | |||
{ /* for srcSize <= 128 KB */ | |||
/* W, C, H, S, L, T, strat */ | |||
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ | |||
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ | |||
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ | |||
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ | |||
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ | |||
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ | |||
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ | |||
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ | |||
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ | |||
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ | |||
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ | |||
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ | |||
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ | |||
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ | |||
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ | |||
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ | |||
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ | |||
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ | |||
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ | |||
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ | |||
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ | |||
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ | |||
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ | |||
}, | |||
{ /* for srcSize <= 16 KB */ | |||
/* W, C, H, S, L, T, strat */ | |||
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ | |||
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ | |||
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ | |||
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ | |||
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ | |||
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ | |||
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ | |||
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ | |||
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ | |||
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ | |||
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ | |||
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ | |||
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ | |||
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ | |||
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ | |||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ | |||
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ | |||
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ | |||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ | |||
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ | |||
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ | |||
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ | |||
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ | |||
}, | |||
}; | |||
#endif /* ZSTD_CLEVELS_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,6 +11,8 @@ | |||
#ifndef ZSTD_COMPILER_H | |||
#define ZSTD_COMPILER_H | |||
#include "portability_macros.h" | |||
/*-******************************************************* | |||
* Compiler specifics | |||
*********************************************************/ | |||
@@ -38,6 +40,17 @@ | |||
#endif | |||
/** | |||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). | |||
This explicitly marks such functions as __cdecl so that the code will still compile | |||
if a CC other than __cdecl has been made the default. | |||
*/ | |||
#if defined(_MSC_VER) | |||
# define WIN_CDECL __cdecl | |||
#else | |||
# define WIN_CDECL | |||
#endif | |||
/** | |||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | |||
* parameters. They must be inlined for the compiler to eliminate the constant | |||
@@ -79,30 +92,19 @@ | |||
# endif | |||
#endif | |||
/* target attribute */ | |||
#ifndef __has_attribute | |||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | |||
#endif | |||
#if defined(__GNUC__) || defined(__ICCARM__) | |||
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | |||
#else | |||
# define TARGET_ATTRIBUTE(target) | |||
#endif | |||
/* Enable runtime BMI2 dispatch based on the CPU. | |||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | |||
/* Target attribute for BMI2 dynamic dispatch. | |||
* Enable lzcnt, bmi, and bmi2. | |||
* We test for bmi1 & bmi2. lzcnt is included in bmi1. | |||
*/ | |||
#ifndef DYNAMIC_BMI2 | |||
#if ((defined(__clang__) && __has_attribute(__target__)) \ | |||
|| (defined(__GNUC__) \ | |||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | |||
&& (defined(__x86_64__) || defined(_M_X86)) \ | |||
&& !defined(__BMI2__) | |||
# define DYNAMIC_BMI2 1 | |||
#else | |||
# define DYNAMIC_BMI2 0 | |||
#endif | |||
#endif | |||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2") | |||
/* prefetch | |||
* can be disabled, by declaring NO_PREFETCH build macro */ | |||
@@ -114,12 +116,12 @@ | |||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | |||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) | |||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) | |||
# elif defined(__aarch64__) | |||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) | |||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) | |||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) | |||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) | |||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) | |||
# elif defined(__aarch64__) | |||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) | |||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) | |||
# else | |||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | |||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | |||
@@ -138,8 +140,9 @@ | |||
} | |||
/* vectorization | |||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ | |||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) | |||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, | |||
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */ | |||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__) | |||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) | |||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |||
# else | |||
@@ -162,6 +165,12 @@ | |||
#define UNLIKELY(x) (x) | |||
#endif | |||
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) | |||
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } | |||
#else | |||
# define ZSTD_UNREACHABLE { assert(0); } | |||
#endif | |||
/* disable warnings */ | |||
#ifdef _MSC_VER /* Visual Studio */ | |||
# include <intrin.h> /* For Visual 2005 */ | |||
@@ -172,4 +181,174 @@ | |||
# pragma warning(disable : 4324) /* disable: C4324: padded structure */ | |||
#endif | |||
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ | |||
#ifndef STATIC_BMI2 | |||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) | |||
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 | |||
# define STATIC_BMI2 1 | |||
# endif | |||
# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__) | |||
# define STATIC_BMI2 1 | |||
# endif | |||
#endif | |||
#ifndef STATIC_BMI2 | |||
#define STATIC_BMI2 0 | |||
#endif | |||
/* compile time determination of SIMD support */ | |||
#if !defined(ZSTD_NO_INTRINSICS) | |||
# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) | |||
# define ZSTD_ARCH_X86_SSE2 | |||
# endif | |||
# if defined(__ARM_NEON) || defined(_M_ARM64) | |||
# define ZSTD_ARCH_ARM_NEON | |||
# endif | |||
# | |||
# if defined(ZSTD_ARCH_X86_SSE2) | |||
# include <emmintrin.h> | |||
# elif defined(ZSTD_ARCH_ARM_NEON) | |||
# include <arm_neon.h> | |||
# endif | |||
#endif | |||
/* C-language Attributes are added in C23. */ | |||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) | |||
# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) | |||
#else | |||
# define ZSTD_HAS_C_ATTRIBUTE(x) 0 | |||
#endif | |||
/* Only use C++ attributes in C++. Some compilers report support for C++ | |||
* attributes when compiling with C. | |||
*/ | |||
#if defined(__cplusplus) && defined(__has_cpp_attribute) | |||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) | |||
#else | |||
# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0 | |||
#endif | |||
/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute. | |||
* - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough | |||
* - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough | |||
* - Else: __attribute__((__fallthrough__)) | |||
*/ | |||
#ifndef ZSTD_FALLTHROUGH | |||
# if ZSTD_HAS_C_ATTRIBUTE(fallthrough) | |||
# define ZSTD_FALLTHROUGH [[fallthrough]] | |||
# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough) | |||
# define ZSTD_FALLTHROUGH [[fallthrough]] | |||
# elif __has_attribute(__fallthrough__) | |||
/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon | |||
* gcc complains about: a label can only be part of a statement and a declaration is not a statement. | |||
*/ | |||
# define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__)) | |||
# else | |||
# define ZSTD_FALLTHROUGH | |||
# endif | |||
#endif | |||
/*-************************************************************** | |||
* Alignment check | |||
*****************************************************************/ | |||
/* this test was initially positioned in mem.h, | |||
* but this file is removed (or replaced) for linux kernel | |||
* so it's now hosted in compiler.h, | |||
* which remains valid for both user & kernel spaces. | |||
*/ | |||
#ifndef ZSTD_ALIGNOF | |||
# if defined(__GNUC__) || defined(_MSC_VER) | |||
/* covers gcc, clang & MSVC */ | |||
/* note : this section must come first, before C11, | |||
* due to a limitation in the kernel source generator */ | |||
# define ZSTD_ALIGNOF(T) __alignof(T) | |||
# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) | |||
/* C11 support */ | |||
# include <stdalign.h> | |||
# define ZSTD_ALIGNOF(T) alignof(T) | |||
# else | |||
/* No known support for alignof() - imperfect backup */ | |||
# define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T)) | |||
# endif | |||
#endif /* ZSTD_ALIGNOF */ | |||
/*-************************************************************** | |||
* Sanitizer | |||
*****************************************************************/ | |||
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an | |||
* abundance of caution, disable our custom poisoning on mingw. */ | |||
#ifdef __MINGW32__ | |||
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE | |||
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 | |||
#endif | |||
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE | |||
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 | |||
#endif | |||
#endif | |||
#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
/* Not all platforms that support msan provide sanitizers/msan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
#include <stddef.h> /* size_t */ | |||
#define ZSTD_DEPS_NEED_STDINT | |||
#include "zstd_deps.h" /* intptr_t */ | |||
/* Make memory region fully initialized (without changing its contents). */ | |||
void __msan_unpoison(const volatile void *a, size_t size); | |||
/* Make memory region fully uninitialized (without changing its contents). | |||
This is a legacy interface that does not update origin information. Use | |||
__msan_allocated_memory() instead. */ | |||
void __msan_poison(const volatile void *a, size_t size); | |||
/* Returns the offset of the first (at least partially) poisoned byte in the | |||
memory range, or -1 if the whole range is good. */ | |||
intptr_t __msan_test_shadow(const volatile void *x, size_t size); | |||
#endif | |||
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* Not all platforms that support asan provide sanitizers/asan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
#include <stddef.h> /* size_t */ | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable. | |||
* | |||
* This memory must be previously allocated by your program. Instrumented | |||
* code is forbidden from accessing addresses in this region until it is | |||
* unpoisoned. This function is not guaranteed to poison the entire region - | |||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan | |||
* alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can poison or | |||
* unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_poison_memory_region(void const volatile *addr, size_t size); | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable. | |||
* | |||
* This memory must be previously allocated by your program. Accessing | |||
* addresses in this region is allowed until this region is poisoned again. | |||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due | |||
* to ASan alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can | |||
* poison or unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |||
#endif | |||
#endif /* ZSTD_COMPILER_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2018-2020, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -16,8 +16,6 @@ | |||
* https://github.com/facebook/folly/blob/master/folly/CpuId.h | |||
*/ | |||
#include <string.h> | |||
#include "mem.h" | |||
#ifdef _MSC_VER |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* debug | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* debug | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -51,15 +51,6 @@ extern "C" { | |||
#endif | |||
/* DEBUGFILE can be defined externally, | |||
* typically through compiler command line. | |||
* note : currently useless. | |||
* Value must be stderr or stdout */ | |||
#ifndef DEBUGFILE | |||
# define DEBUGFILE stderr | |||
#endif | |||
/* recommended values for DEBUGLEVEL : | |||
* 0 : release mode, no debug, all run-time checks disabled | |||
* 1 : enables assert() only, no display | |||
@@ -76,7 +67,8 @@ extern "C" { | |||
*/ | |||
#if (DEBUGLEVEL>=1) | |||
# include <assert.h> | |||
# define ZSTD_DEPS_NEED_ASSERT | |||
# include "zstd_deps.h" | |||
#else | |||
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ | |||
# define assert(condition) ((void)0) /* disable assert (default) */ | |||
@@ -84,7 +76,8 @@ extern "C" { | |||
#endif | |||
#if (DEBUGLEVEL>=2) | |||
# include <stdio.h> | |||
# define ZSTD_DEPS_NEED_IO | |||
# include "zstd_deps.h" | |||
extern int g_debuglevel; /* the variable is only declared, | |||
it actually lives in debug.c, | |||
and is shared by the whole process. | |||
@@ -92,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared, | |||
It's useful when enabling very verbose levels | |||
on selective conditions (such as position in src) */ | |||
# define RAWLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
fprintf(stderr, __VA_ARGS__); \ | |||
# define RAWLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
ZSTD_DEBUG_PRINT(__VA_ARGS__); \ | |||
} } | |||
# define DEBUGLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ | |||
fprintf(stderr, " \n"); \ | |||
# define DEBUGLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ | |||
ZSTD_DEBUG_PRINT(" \n"); \ | |||
} } | |||
#else | |||
# define RAWLOG(l, ...) {} /* disabled */ |
@@ -1,6 +1,6 @@ | |||
/* ****************************************************************** | |||
* Common functions of New Generation Entropy library | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -19,8 +19,8 @@ | |||
#include "error_private.h" /* ERR_*, ERROR */ | |||
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ | |||
#include "fse.h" | |||
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ | |||
#include "huf.h" | |||
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ | |||
/*=== Version ===*/ | |||
@@ -38,8 +38,9 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | |||
/*-************************************************************** | |||
* FSE NCount encoding-decoding | |||
****************************************************************/ | |||
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize) | |||
FORCE_INLINE_TEMPLATE | |||
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize) | |||
{ | |||
const BYTE* const istart = (const BYTE*) headerBuffer; | |||
const BYTE* const iend = istart + hbSize; | |||
@@ -50,23 +51,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
U32 bitStream; | |||
int bitCount; | |||
unsigned charnum = 0; | |||
unsigned const maxSV1 = *maxSVPtr + 1; | |||
int previous0 = 0; | |||
if (hbSize < 4) { | |||
/* This function only works when hbSize >= 4 */ | |||
char buffer[4]; | |||
memset(buffer, 0, sizeof(buffer)); | |||
memcpy(buffer, headerBuffer, hbSize); | |||
if (hbSize < 8) { | |||
/* This function only works when hbSize >= 8 */ | |||
char buffer[8] = {0}; | |||
ZSTD_memcpy(buffer, headerBuffer, hbSize); | |||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | |||
buffer, sizeof(buffer)); | |||
if (FSE_isError(countSize)) return countSize; | |||
if (countSize > hbSize) return ERROR(corruption_detected); | |||
return countSize; | |||
} } | |||
assert(hbSize >= 4); | |||
assert(hbSize >= 8); | |||
/* init */ | |||
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | |||
ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | |||
bitStream = MEM_readLE32(ip); | |||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | |||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | |||
@@ -77,36 +78,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
threshold = 1<<nbBits; | |||
nbBits++; | |||
while ((remaining>1) & (charnum<=*maxSVPtr)) { | |||
for (;;) { | |||
if (previous0) { | |||
unsigned n0 = charnum; | |||
while ((bitStream & 0xFFFF) == 0xFFFF) { | |||
n0 += 24; | |||
if (ip < iend-5) { | |||
ip += 2; | |||
bitStream = MEM_readLE32(ip) >> bitCount; | |||
/* Count the number of repeats. Each time the | |||
* 2-bit repeat code is 0b11 there is another | |||
* repeat. | |||
* Avoid UB by setting the high bit to 1. | |||
*/ | |||
int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; | |||
while (repeats >= 12) { | |||
charnum += 3 * 12; | |||
if (LIKELY(ip <= iend-7)) { | |||
ip += 3; | |||
} else { | |||
bitStream >>= 16; | |||
bitCount += 16; | |||
} } | |||
while ((bitStream & 3) == 3) { | |||
n0 += 3; | |||
bitStream >>= 2; | |||
bitCount += 2; | |||
bitCount -= (int)(8 * (iend - 7 - ip)); | |||
bitCount &= 31; | |||
ip = iend - 4; | |||
} | |||
bitStream = MEM_readLE32(ip) >> bitCount; | |||
repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; | |||
} | |||
n0 += bitStream & 3; | |||
charnum += 3 * repeats; | |||
bitStream >>= 2 * repeats; | |||
bitCount += 2 * repeats; | |||
/* Add the final repeat which isn't 0b11. */ | |||
assert((bitStream & 3) < 3); | |||
charnum += bitStream & 3; | |||
bitCount += 2; | |||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | |||
while (charnum < n0) normalizedCounter[charnum++] = 0; | |||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |||
/* This is an error, but break and return an error | |||
* at the end, because returning out of a loop makes | |||
* it harder for the compiler to optimize. | |||
*/ | |||
if (charnum >= maxSV1) break; | |||
/* We don't need to set the normalized count to 0 | |||
* because we already memset the whole buffer to 0. | |||
*/ | |||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |||
assert((bitCount >> 3) <= 3); /* For first condition to work */ | |||
ip += bitCount>>3; | |||
bitCount &= 7; | |||
bitStream = MEM_readLE32(ip) >> bitCount; | |||
} else { | |||
bitStream >>= 2; | |||
} } | |||
{ int const max = (2*threshold-1) - remaining; | |||
bitCount -= (int)(8 * (iend - 4 - ip)); | |||
bitCount &= 31; | |||
ip = iend - 4; | |||
} | |||
bitStream = MEM_readLE32(ip) >> bitCount; | |||
} | |||
{ | |||
int const max = (2*threshold-1) - remaining; | |||
int count; | |||
if ((bitStream & (threshold-1)) < (U32)max) { | |||
@@ -119,24 +142,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
} | |||
count--; /* extra accuracy */ | |||
remaining -= count < 0 ? -count : count; /* -1 means +1 */ | |||
/* When it matters (small blocks), this is a | |||
* predictable branch, because we don't use -1. | |||
*/ | |||
if (count >= 0) { | |||
remaining -= count; | |||
} else { | |||
assert(count == -1); | |||
remaining += count; | |||
} | |||
normalizedCounter[charnum++] = (short)count; | |||
previous0 = !count; | |||
while (remaining < threshold) { | |||
nbBits--; | |||
threshold >>= 1; | |||
assert(threshold > 1); | |||
if (remaining < threshold) { | |||
/* This branch can be folded into the | |||
* threshold update condition because we | |||
* know that threshold > 1. | |||
*/ | |||
if (remaining <= 1) break; | |||
nbBits = ZSTD_highbit32(remaining) + 1; | |||
threshold = 1 << (nbBits - 1); | |||
} | |||
if (charnum >= maxSV1) break; | |||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |||
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |||
ip += bitCount>>3; | |||
bitCount &= 7; | |||
} else { | |||
bitCount -= (int)(8 * (iend - 4 - ip)); | |||
bitCount &= 31; | |||
ip = iend - 4; | |||
} | |||
bitStream = MEM_readLE32(ip) >> (bitCount & 31); | |||
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ | |||
bitStream = MEM_readLE32(ip) >> bitCount; | |||
} } | |||
if (remaining != 1) return ERROR(corruption_detected); | |||
/* Only possible when there are too many zeros. */ | |||
if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); | |||
if (bitCount > 32) return ERROR(corruption_detected); | |||
*maxSVPtr = charnum-1; | |||
@@ -144,6 +186,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
return ip-istart; | |||
} | |||
/* Avoids the FORCE_INLINE of the _body() function. */ | |||
static size_t FSE_readNCount_body_default( | |||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize) | |||
{ | |||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); | |||
} | |||
#if DYNAMIC_BMI2 | |||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2( | |||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize) | |||
{ | |||
return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); | |||
} | |||
#endif | |||
size_t FSE_readNCount_bmi2( | |||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize, int bmi2) | |||
{ | |||
#if DYNAMIC_BMI2 | |||
if (bmi2) { | |||
return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); | |||
} | |||
#endif | |||
(void)bmi2; | |||
return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); | |||
} | |||
size_t FSE_readNCount( | |||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | |||
const void* headerBuffer, size_t hbSize) | |||
{ | |||
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); | |||
} | |||
/*! HUF_readStats() : | |||
Read compact Huffman tree, saved by HUF_writeCTable(). | |||
@@ -155,6 +234,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; | |||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); | |||
} | |||
FORCE_INLINE_TEMPLATE size_t | |||
HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t wkspSize, | |||
int bmi2) | |||
{ | |||
U32 weightTotal; | |||
const BYTE* ip = (const BYTE*) src; | |||
@@ -163,7 +253,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
if (!srcSize) return ERROR(srcSize_wrong); | |||
iSize = ip[0]; | |||
/* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | |||
/* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | |||
if (iSize >= 128) { /* special header */ | |||
oSize = iSize - 127; | |||
@@ -177,31 +267,31 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
huffWeight[n+1] = ip[n/2] & 15; | |||
} } } | |||
else { /* header compressed with FSE (normal case) */ | |||
FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ | |||
if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | |||
oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ | |||
/* max (hwSize-1) values decoded, as last one is implied */ | |||
oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); | |||
if (FSE_isError(oSize)) return oSize; | |||
} | |||
/* collect weight stats */ | |||
memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | |||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | |||
weightTotal = 0; | |||
{ U32 n; for (n=0; n<oSize; n++) { | |||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); | |||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | |||
rankStats[huffWeight[n]]++; | |||
weightTotal += (1 << huffWeight[n]) >> 1; | |||
} } | |||
if (weightTotal == 0) return ERROR(corruption_detected); | |||
/* get last non-null symbol weight (implied, total must be 2^n) */ | |||
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1; | |||
{ U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; | |||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | |||
*tableLogPtr = tableLog; | |||
/* determine last weight */ | |||
{ U32 const total = 1 << tableLog; | |||
U32 const rest = total - weightTotal; | |||
U32 const verif = 1 << BIT_highbit32(rest); | |||
U32 const lastWeight = BIT_highbit32(rest) + 1; | |||
U32 const verif = 1 << ZSTD_highbit32(rest); | |||
U32 const lastWeight = ZSTD_highbit32(rest) + 1; | |||
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ | |||
huffWeight[oSize] = (BYTE)lastWeight; | |||
rankStats[lastWeight]++; | |||
@@ -214,3 +304,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
*nbSymbolsPtr = (U32)(oSize+1); | |||
return iSize+1; | |||
} | |||
/* Avoids the FORCE_INLINE of the _body() function. */ | |||
static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t wkspSize) | |||
{ | |||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); | |||
} | |||
#if DYNAMIC_BMI2 | |||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t wkspSize) | |||
{ | |||
return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); | |||
} | |||
#endif | |||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t wkspSize, | |||
int flags) | |||
{ | |||
#if DYNAMIC_BMI2 | |||
if (flags & HUF_flags_bmi2) { | |||
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); | |||
} | |||
#endif | |||
(void)flags; | |||
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -27,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code) | |||
case PREFIX(version_unsupported): return "Version not supported"; | |||
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; | |||
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; | |||
case PREFIX(corruption_detected): return "Corrupted block detected"; | |||
case PREFIX(corruption_detected): return "Data corruption detected"; | |||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; | |||
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; | |||
case PREFIX(parameter_unsupported): return "Unsupported parameter"; | |||
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; | |||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; | |||
case PREFIX(init_missing): return "Context should be init first"; | |||
case PREFIX(memory_allocation): return "Allocation error : not enough memory"; | |||
@@ -38,16 +40,22 @@ const char* ERR_getErrorString(ERR_enum code) | |||
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; | |||
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; | |||
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; | |||
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; | |||
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; | |||
case PREFIX(dictionary_wrong): return "Dictionary mismatch"; | |||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; | |||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; | |||
case PREFIX(srcSize_wrong): return "Src size is incorrect"; | |||
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; | |||
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; | |||
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; | |||
/* following error codes are not stable and may be removed or changed in a future version */ | |||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; | |||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; | |||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; | |||
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; | |||
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; | |||
case PREFIX(externalSequences_invalid): return "External sequences are not valid"; | |||
case PREFIX(maxCode): | |||
default: return notErrorCode; | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -21,8 +21,10 @@ extern "C" { | |||
/* **************************************** | |||
* Dependencies | |||
******************************************/ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd_errors.h" /* enum list */ | |||
#include "compiler.h" | |||
#include "debug.h" | |||
#include "zstd_deps.h" /* size_t */ | |||
/* **************************************** | |||
@@ -73,6 +75,83 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) | |||
return ERR_getErrorString(ERR_getErrorCode(code)); | |||
} | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* This is a helper function to help force C99-correctness during compilation. | |||
* Under strict compilation modes, variadic macro arguments can't be empty. | |||
* However, variadic function arguments can be. Using a function therefore lets | |||
* us statically check that at least one (string) argument was passed, | |||
* independent of the compilation flags. | |||
*/ | |||
static INLINE_KEYWORD UNUSED_ATTR | |||
void _force_has_format_string(const char *format, ...) { | |||
(void)format; | |||
} | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* We want to force this function invocation to be syntactically correct, but | |||
* we don't want to force runtime evaluation of its arguments. | |||
*/ | |||
#define _FORCE_HAS_FORMAT_STRING(...) \ | |||
if (0) { \ | |||
_force_has_format_string(__VA_ARGS__); \ | |||
} | |||
#define ERR_QUOTE(str) #str | |||
/** | |||
* Return the specified error if the condition evaluates to true. | |||
* | |||
* In debug modes, prints additional information. | |||
* In order to do that (particularly, printing the conditional that failed), | |||
* this can't just wrap RETURN_ERROR(). | |||
*/ | |||
#define RETURN_ERROR_IF(cond, err, ...) \ | |||
if (cond) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ | |||
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} | |||
/** | |||
* Unconditionally return the specified error. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define RETURN_ERROR(err, ...) \ | |||
do { \ | |||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ | |||
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} while(0); | |||
/** | |||
* If the provided expression evaluates to an error code, returns that error code. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define FORWARD_IF_ERROR(err, ...) \ | |||
do { \ | |||
size_t const err_code = (err); \ | |||
if (ERR_isError(err_code)) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ | |||
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return err_code; \ | |||
} \ | |||
} while(0); | |||
#if defined (__cplusplus) | |||
} | |||
#endif |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* FSE : Finite State Entropy codec | |||
* Public Prototypes declaration | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -23,7 +23,7 @@ extern "C" { | |||
/*-***************************************** | |||
* Dependencies | |||
******************************************/ | |||
#include <stddef.h> /* size_t, ptrdiff_t */ | |||
#include "zstd_deps.h" /* size_t, ptrdiff_t */ | |||
/*-***************************************** | |||
@@ -53,34 +53,6 @@ extern "C" { | |||
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ | |||
/*-**************************************** | |||
* FSE simple functions | |||
******************************************/ | |||
/*! FSE_compress() : | |||
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. | |||
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). | |||
@return : size of compressed data (<= dstCapacity). | |||
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! | |||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. | |||
if FSE_isError(return), compression failed (more details using FSE_getErrorName()) | |||
*/ | |||
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize); | |||
/*! FSE_decompress(): | |||
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', | |||
into already allocated destination buffer 'dst', of size 'dstCapacity'. | |||
@return : size of regenerated data (<= maxDstSize), | |||
or an error code, which can be tested using FSE_isError() . | |||
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! | |||
Why ? : making this distinction requires a header. | |||
Header management is intentionally delegated to the user layer, which can better manage special cases. | |||
*/ | |||
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, | |||
const void* cSrc, size_t cSrcSize); | |||
/*-***************************************** | |||
* Tool functions | |||
******************************************/ | |||
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return | |||
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ | |||
/*-***************************************** | |||
* FSE advanced functions | |||
******************************************/ | |||
/*! FSE_compress2() : | |||
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' | |||
Both parameters can be defined as '0' to mean : use default value | |||
@return : size of compressed data | |||
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! | |||
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. | |||
if FSE_isError(return), it's an error code. | |||
*/ | |||
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |||
/*-***************************************** | |||
* FSE detailed API | |||
******************************************/ | |||
@@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize | |||
/*! FSE_normalizeCount(): | |||
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) | |||
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). | |||
useLowProbCount is a boolean parameter which trades off compressed size for | |||
faster header decoding. When it is set to 1, the compressed data will be slightly | |||
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be | |||
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 | |||
is a good default, since header deserialization makes a big speed difference. | |||
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. | |||
@return : tableLog, | |||
or an errorCode, which can be tested using FSE_isError() */ | |||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, | |||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue); | |||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); | |||
/*! FSE_NCountWriteBound(): | |||
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. | |||
@@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, | |||
/*! Constructor and Destructor of FSE_CTable. | |||
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ | |||
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ | |||
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); | |||
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); | |||
/*! FSE_buildCTable(): | |||
Builds `ct`, which must be already allocated, using FSE_createCTable(). | |||
@@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, | |||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, | |||
const void* rBuffer, size_t rBuffSize); | |||
/*! Constructor and Destructor of FSE_DTable. | |||
Note that its size depends on 'tableLog' */ | |||
/*! FSE_readNCount_bmi2(): | |||
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. | |||
*/ | |||
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, | |||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, | |||
const void* rBuffer, size_t rBuffSize, int bmi2); | |||
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ | |||
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); | |||
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); | |||
/*! FSE_buildDTable(): | |||
Builds 'dt', which must be already allocated, using FSE_createDTable(). | |||
return : 0, or an errorCode, which can be tested using FSE_isError() */ | |||
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); | |||
/*! FSE_decompress_usingDTable(): | |||
Decompress compressed source `cSrc` of size `cSrcSize` using `dt` | |||
into `dst` which must be already allocated. | |||
@return : size of regenerated data (necessarily <= `dstCapacity`), | |||
or an errorCode, which can be tested using FSE_isError() */ | |||
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); | |||
/*! | |||
Tutorial : | |||
@@ -288,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste | |||
*******************************************/ | |||
/* FSE buffer bounds */ | |||
#define FSE_NCOUNTBOUND 512 | |||
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) | |||
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) | |||
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ | |||
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ | |||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) | |||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) | |||
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) | |||
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) | |||
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ | |||
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) | |||
@@ -307,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste | |||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); | |||
/**< same as FSE_optimalTableLog(), which used `minus==2` */ | |||
/* FSE_compress_wksp() : | |||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |||
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. | |||
*/ | |||
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) | |||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); | |||
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ | |||
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); | |||
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ | |||
/* FSE_buildCTable_wksp() : | |||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). | |||
* `wkspSize` must be >= `(1<<tableLog)`. | |||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. | |||
* See FSE_buildCTable_wksp() for breakdown of workspace usage. | |||
*/ | |||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */) | |||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) | |||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); | |||
/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ | |||
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) | |||
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) | |||
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |||
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ | |||
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); | |||
/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ | |||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); | |||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ | |||
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) | |||
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) | |||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); | |||
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. | |||
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ | |||
typedef enum { | |||
FSE_repeat_none, /**< Cannot use the previous table */ | |||
@@ -529,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt | |||
/* FSE_getMaxNbBits() : | |||
* Approximate maximum cost of a symbol, in bits. | |||
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) | |||
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) | |||
* note 1 : assume symbolValue is valid (<= maxSymbolValue) | |||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ | |||
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) | |||
@@ -644,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) | |||
#ifndef FSE_DEFAULT_MEMORY_USAGE | |||
# define FSE_DEFAULT_MEMORY_USAGE 13 | |||
#endif | |||
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) | |||
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" | |||
#endif | |||
/*!FSE_MAX_SYMBOL_VALUE : | |||
* Maximum symbol value authorized. | |||
@@ -677,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) | |||
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" | |||
#endif | |||
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) | |||
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) | |||
#endif /* FSE_STATIC_LINKING_ONLY */ |
@@ -1,6 +1,6 @@ | |||
/* ****************************************************************** | |||
* FSE : Finite State Entropy encoder | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -15,8 +15,6 @@ | |||
/* ************************************************************** | |||
* Includes | |||
****************************************************************/ | |||
#include <stdlib.h> /* malloc, free, qsort */ | |||
#include <string.h> /* memcpy, memset */ | |||
#include "compiler.h" | |||
#include "mem.h" /* U32, U16, etc. */ | |||
#include "debug.h" /* assert, DEBUGLOG */ | |||
@@ -25,6 +23,10 @@ | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#include "error_private.h" | |||
#define ZSTD_DEPS_NEED_MALLOC | |||
#define ZSTD_DEPS_NEED_MATH64 | |||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ | |||
#include "bits.h" /* ZSTD_highbit32 */ | |||
/* ************************************************************** | |||
@@ -74,41 +76,85 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, | |||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; | |||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); | |||
U32 const step = FSE_TABLESTEP(tableSize); | |||
U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; | |||
U32 const maxSV1 = maxSymbolValue+1; | |||
U16* cumul = (U16*)workSpace; /* size = maxSV1 */ | |||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ | |||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; | |||
U32 highThreshold = tableSize-1; | |||
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ | |||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); | |||
/* CTable header */ | |||
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); | |||
tableU16[-2] = (U16) tableLog; | |||
tableU16[-1] = (U16) maxSymbolValue; | |||
assert(tableLog < 16); /* required for threshold strategy to work */ | |||
/* For explanations on how to distribute symbol values over the table : | |||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | |||
* https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | |||
#ifdef __clang_analyzer__ | |||
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ | |||
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ | |||
#endif | |||
/* symbol start positions */ | |||
{ U32 u; | |||
cumul[0] = 0; | |||
for (u=1; u <= maxSymbolValue+1; u++) { | |||
for (u=1; u <= maxSV1; u++) { | |||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ | |||
cumul[u] = cumul[u-1] + 1; | |||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); | |||
} else { | |||
cumul[u] = cumul[u-1] + normalizedCounter[u-1]; | |||
assert(normalizedCounter[u-1] >= 0); | |||
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; | |||
assert(cumul[u] >= cumul[u-1]); /* no overflow */ | |||
} } | |||
cumul[maxSymbolValue+1] = tableSize+1; | |||
cumul[maxSV1] = (U16)(tableSize+1); | |||
} | |||
/* Spread symbols */ | |||
{ U32 position = 0; | |||
if (highThreshold == tableSize - 1) { | |||
/* Case for no low prob count symbols. Lay down 8 bytes at a time | |||
* to reduce branch misses since we are operating on a small block | |||
*/ | |||
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ | |||
{ U64 const add = 0x0101010101010101ull; | |||
size_t pos = 0; | |||
U64 sv = 0; | |||
U32 s; | |||
for (s=0; s<maxSV1; ++s, sv += add) { | |||
int i; | |||
int const n = normalizedCounter[s]; | |||
MEM_write64(spread + pos, sv); | |||
for (i = 8; i < n; i += 8) { | |||
MEM_write64(spread + pos + i, sv); | |||
} | |||
assert(n>=0); | |||
pos += (size_t)n; | |||
} | |||
} | |||
/* Spread symbols across the table. Lack of lowprob symbols means that | |||
* we don't need variable sized inner loop, so we can unroll the loop and | |||
* reduce branch misses. | |||
*/ | |||
{ size_t position = 0; | |||
size_t s; | |||
size_t const unroll = 2; /* Experimentally determined optimal unroll */ | |||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ | |||
for (s = 0; s < (size_t)tableSize; s += unroll) { | |||
size_t u; | |||
for (u = 0; u < unroll; ++u) { | |||
size_t const uPosition = (position + (u * step)) & tableMask; | |||
tableSymbol[uPosition] = spread[s + u]; | |||
} | |||
position = (position + (unroll * step)) & tableMask; | |||
} | |||
assert(position == 0); /* Must have initialized all positions */ | |||
} | |||
} else { | |||
U32 position = 0; | |||
U32 symbol; | |||
for (symbol=0; symbol<=maxSymbolValue; symbol++) { | |||
for (symbol=0; symbol<maxSV1; symbol++) { | |||
int nbOccurrences; | |||
int const freq = normalizedCounter[symbol]; | |||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { | |||
@@ -117,7 +163,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, | |||
while (position > highThreshold) | |||
position = (position + step) & tableMask; /* Low proba area */ | |||
} } | |||
assert(position==0); /* Must have initialized all positions */ | |||
} | |||
@@ -141,16 +186,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, | |||
case -1: | |||
case 1: | |||
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); | |||
symbolTT[s].deltaFindState = total - 1; | |||
assert(total <= INT_MAX); | |||
symbolTT[s].deltaFindState = (int)(total - 1); | |||
total ++; | |||
break; | |||
default : | |||
{ | |||
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); | |||
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; | |||
assert(normalizedCounter[s] > 1); | |||
{ U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); | |||
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; | |||
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; | |||
symbolTT[s].deltaFindState = total - normalizedCounter[s]; | |||
total += normalizedCounter[s]; | |||
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); | |||
total += (unsigned)normalizedCounter[s]; | |||
} } } } | |||
#if 0 /* debug : symbol costs */ | |||
@@ -161,31 +207,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, | |||
symbol, normalizedCounter[symbol], | |||
FSE_getMaxNbBits(symbolTT, symbol), | |||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); | |||
} | |||
} | |||
} } | |||
#endif | |||
return 0; | |||
} | |||
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ | |||
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); | |||
} | |||
#ifndef FSE_COMMONDEFS_ONLY | |||
/*-************************************************************** | |||
* FSE NCount encoding | |||
****************************************************************/ | |||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; | |||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog | |||
+ 4 /* bitCount initialized at 4 */ | |||
+ 2 /* first two symbols may use one additional bit each */) / 8) | |||
+ 1 /* round up to whole nb bytes */ | |||
+ 2 /* additional two bytes for bitstream flush */; | |||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ | |||
} | |||
@@ -302,21 +343,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, | |||
* FSE Compression Code | |||
****************************************************************/ | |||
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
size_t size; | |||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; | |||
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); | |||
return (FSE_CTable*)malloc(size); | |||
} | |||
void FSE_freeCTable (FSE_CTable* ct) { free(ct); } | |||
/* provides the minimum logSize to safely represent a distribution */ | |||
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) | |||
{ | |||
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; | |||
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; | |||
U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; | |||
U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; | |||
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; | |||
assert(srcSize > 1); /* Not supported, RLE should be used instead */ | |||
return minBits; | |||
@@ -324,7 +355,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) | |||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) | |||
{ | |||
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; | |||
U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; | |||
U32 tableLog = maxTableLog; | |||
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); | |||
assert(srcSize > 1); /* Not supported, RLE should be used instead */ | |||
@@ -341,11 +372,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS | |||
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); | |||
} | |||
/* Secondary normalization method. | |||
To be used when primary method fails. */ | |||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) | |||
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) | |||
{ | |||
short const NOT_YET_ASSIGNED = -2; | |||
U32 s; | |||
@@ -362,7 +392,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
continue; | |||
} | |||
if (count[s] <= lowThreshold) { | |||
norm[s] = -1; | |||
norm[s] = lowProbCount; | |||
distributed++; | |||
total -= count[s]; | |||
continue; | |||
@@ -414,7 +444,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
{ U64 const vStepLog = 62 - tableLog; | |||
U64 const mid = (1ULL << (vStepLog-1)) - 1; | |||
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ | |||
U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ | |||
U64 tmpTotal = mid; | |||
for (s=0; s<=maxSymbolValue; s++) { | |||
if (norm[s]==NOT_YET_ASSIGNED) { | |||
@@ -431,10 +461,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
return 0; | |||
} | |||
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
const unsigned* count, size_t total, | |||
unsigned maxSymbolValue) | |||
unsigned maxSymbolValue, unsigned useLowProbCount) | |||
{ | |||
/* Sanity checks */ | |||
if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; | |||
@@ -443,8 +472,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ | |||
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; | |||
short const lowProbCount = useLowProbCount ? -1 : 1; | |||
U64 const scale = 62 - tableLog; | |||
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ | |||
U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ | |||
U64 const vStep = 1ULL<<(scale-20); | |||
int stillToDistribute = 1<<tableLog; | |||
unsigned s; | |||
@@ -456,7 +486,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
if (count[s] == total) return 0; /* rle special case */ | |||
if (count[s] == 0) { normalizedCounter[s]=0; continue; } | |||
if (count[s] <= lowThreshold) { | |||
normalizedCounter[s] = -1; | |||
normalizedCounter[s] = lowProbCount; | |||
stillToDistribute--; | |||
} else { | |||
short proba = (short)((count[s]*step) >> scale); | |||
@@ -470,7 +500,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
} } | |||
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { | |||
/* corner case, need another normalization method */ | |||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); | |||
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); | |||
if (FSE_isError(errorCode)) return errorCode; | |||
} | |||
else normalizedCounter[largest] += (short)stillToDistribute; | |||
@@ -493,40 +523,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
return tableLog; | |||
} | |||
/* fake FSE_CTable, for raw (uncompressed) input */ | |||
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) | |||
{ | |||
const unsigned tableSize = 1 << nbBits; | |||
const unsigned tableMask = tableSize - 1; | |||
const unsigned maxSymbolValue = tableMask; | |||
void* const ptr = ct; | |||
U16* const tableU16 = ( (U16*) ptr) + 2; | |||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ | |||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); | |||
unsigned s; | |||
/* Sanity checks */ | |||
if (nbBits < 1) return ERROR(GENERIC); /* min size */ | |||
/* header */ | |||
tableU16[-2] = (U16) nbBits; | |||
tableU16[-1] = (U16) maxSymbolValue; | |||
/* Build table */ | |||
for (s=0; s<tableSize; s++) | |||
tableU16[s] = (U16)(tableSize + s); | |||
/* Build Symbol Transformation Table */ | |||
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); | |||
for (s=0; s<=maxSymbolValue; s++) { | |||
symbolTT[s].deltaNbBits = deltaNbBits; | |||
symbolTT[s].deltaFindState = s-1; | |||
} } | |||
return 0; | |||
} | |||
/* fake FSE_CTable, for rle input (always same symbol) */ | |||
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) | |||
{ | |||
@@ -625,74 +621,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, | |||
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } | |||
/* FSE_compress_wksp() : | |||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |||
* `wkspSize` size must be `(1<<tableLog)`. | |||
*/ | |||
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |||
{ | |||
BYTE* const ostart = (BYTE*) dst; | |||
BYTE* op = ostart; | |||
BYTE* const oend = ostart + dstSize; | |||
unsigned count[FSE_MAX_SYMBOL_VALUE+1]; | |||
S16 norm[FSE_MAX_SYMBOL_VALUE+1]; | |||
FSE_CTable* CTable = (FSE_CTable*)workSpace; | |||
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); | |||
void* scratchBuffer = (void*)(CTable + CTableSize); | |||
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); | |||
/* init conditions */ | |||
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); | |||
if (srcSize <= 1) return 0; /* Not compressible */ | |||
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; | |||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; | |||
/* Scan input and build symbol stats */ | |||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) ); | |||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ | |||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ | |||
} | |||
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); | |||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); | |||
/* Write table description header */ | |||
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); | |||
op += nc_err; | |||
} | |||
/* Compress */ | |||
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); | |||
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); | |||
if (cSize == 0) return 0; /* not enough space for compressed data */ | |||
op += cSize; | |||
} | |||
/* check compressibility */ | |||
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; | |||
return op-ostart; | |||
} | |||
typedef struct { | |||
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; | |||
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; | |||
} fseWkspMax_t; | |||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
fseWkspMax_t scratchBuffer; | |||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); | |||
} | |||
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); | |||
} | |||
#endif /* FSE_COMMONDEFS_ONLY */ |
@@ -1,6 +1,6 @@ | |||
/* ****************************************************************** | |||
* FSE : Finite State Entropy decoder | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -16,13 +16,15 @@ | |||
/* ************************************************************** | |||
* Includes | |||
****************************************************************/ | |||
#include <stdlib.h> /* malloc, free, qsort */ | |||
#include <string.h> /* memcpy, memset */ | |||
#include "debug.h" /* assert */ | |||
#include "bitstream.h" | |||
#include "compiler.h" | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#include "error_private.h" | |||
#define ZSTD_DEPS_NEED_MALLOC | |||
#include "zstd_deps.h" | |||
#include "bits.h" /* ZSTD_highbit32 */ | |||
/* ************************************************************** | |||
@@ -54,30 +56,19 @@ | |||
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) | |||
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) | |||
/* Function templates */ | |||
FSE_DTable* FSE_createDTable (unsigned tableLog) | |||
{ | |||
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; | |||
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); | |||
} | |||
void FSE_freeDTable (FSE_DTable* dt) | |||
{ | |||
free(dt); | |||
} | |||
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |||
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |||
{ | |||
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ | |||
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); | |||
U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; | |||
U16* symbolNext = (U16*)workSpace; | |||
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); | |||
U32 const maxSV1 = maxSymbolValue + 1; | |||
U32 const tableSize = 1 << tableLog; | |||
U32 highThreshold = tableSize-1; | |||
/* Sanity Checks */ | |||
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); | |||
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |||
@@ -95,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned | |||
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; | |||
symbolNext[s] = normalizedCounter[s]; | |||
} } } | |||
memcpy(dt, &DTableH, sizeof(DTableH)); | |||
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); | |||
} | |||
/* Spread symbols */ | |||
{ U32 const tableMask = tableSize-1; | |||
if (highThreshold == tableSize - 1) { | |||
size_t const tableMask = tableSize-1; | |||
size_t const step = FSE_TABLESTEP(tableSize); | |||
/* First lay down the symbols in order. | |||
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch | |||
* misses since small blocks generally have small table logs, so nearly | |||
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of | |||
* our buffer to handle the over-write. | |||
*/ | |||
{ | |||
U64 const add = 0x0101010101010101ull; | |||
size_t pos = 0; | |||
U64 sv = 0; | |||
U32 s; | |||
for (s=0; s<maxSV1; ++s, sv += add) { | |||
int i; | |||
int const n = normalizedCounter[s]; | |||
MEM_write64(spread + pos, sv); | |||
for (i = 8; i < n; i += 8) { | |||
MEM_write64(spread + pos + i, sv); | |||
} | |||
pos += n; | |||
} | |||
} | |||
/* Now we spread those positions across the table. | |||
* The benefit of doing it in two stages is that we avoid the | |||
* variable size inner loop, which caused lots of branch misses. | |||
* Now we can run through all the positions without any branch misses. | |||
* We unroll the loop twice, since that is what empirically worked best. | |||
*/ | |||
{ | |||
size_t position = 0; | |||
size_t s; | |||
size_t const unroll = 2; | |||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ | |||
for (s = 0; s < (size_t)tableSize; s += unroll) { | |||
size_t u; | |||
for (u = 0; u < unroll; ++u) { | |||
size_t const uPosition = (position + (u * step)) & tableMask; | |||
tableDecode[uPosition].symbol = spread[s + u]; | |||
} | |||
position = (position + (unroll * step)) & tableMask; | |||
} | |||
assert(position == 0); | |||
} | |||
} else { | |||
U32 const tableMask = tableSize-1; | |||
U32 const step = FSE_TABLESTEP(tableSize); | |||
U32 s, position = 0; | |||
for (s=0; s<maxSV1; s++) { | |||
@@ -117,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned | |||
for (u=0; u<tableSize; u++) { | |||
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | |||
U32 const nextState = symbolNext[symbol]++; | |||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); | |||
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) ); | |||
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | |||
} } | |||
return 0; | |||
} | |||
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |||
{ | |||
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize); | |||
} | |||
#ifndef FSE_COMMONDEFS_ONLY | |||
/*-******************************************************* | |||
* Decompression (Byte symbols) | |||
*********************************************************/ | |||
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) | |||
{ | |||
void* ptr = dt; | |||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |||
void* dPtr = dt + 1; | |||
FSE_decode_t* const cell = (FSE_decode_t*)dPtr; | |||
DTableH->tableLog = 0; | |||
DTableH->fastMode = 0; | |||
cell->newState = 0; | |||
cell->symbol = symbolValue; | |||
cell->nbBits = 0; | |||
return 0; | |||
} | |||
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) | |||
{ | |||
void* ptr = dt; | |||
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |||
void* dPtr = dt + 1; | |||
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; | |||
const unsigned tableSize = 1 << nbBits; | |||
const unsigned tableMask = tableSize - 1; | |||
const unsigned maxSV1 = tableMask+1; | |||
unsigned s; | |||
/* Sanity checks */ | |||
if (nbBits < 1) return ERROR(GENERIC); /* min size */ | |||
/* Build Decoding Table */ | |||
DTableH->tableLog = (U16)nbBits; | |||
DTableH->fastMode = 1; | |||
for (s=0; s<maxSV1; s++) { | |||
dinfo[s].newState = 0; | |||
dinfo[s].symbol = (BYTE)s; | |||
dinfo[s].nbBits = (BYTE)nbBits; | |||
} | |||
return 0; | |||
} | |||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( | |||
void* dst, size_t maxDstSize, | |||
@@ -236,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( | |||
return op-ostart; | |||
} | |||
typedef struct { | |||
short ncount[FSE_MAX_SYMBOL_VALUE + 1]; | |||
FSE_DTable dtable[1]; /* Dynamically sized */ | |||
} FSE_DecompressWksp; | |||
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, | |||
const void* cSrc, size_t cSrcSize, | |||
const FSE_DTable* dt) | |||
{ | |||
const void* ptr = dt; | |||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | |||
const U32 fastMode = DTableH->fastMode; | |||
/* select fast mode (static) */ | |||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); | |||
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); | |||
} | |||
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) | |||
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( | |||
void* dst, size_t dstCapacity, | |||
const void* cSrc, size_t cSrcSize, | |||
unsigned maxLog, void* workSpace, size_t wkspSize, | |||
int bmi2) | |||
{ | |||
const BYTE* const istart = (const BYTE*)cSrc; | |||
const BYTE* ip = istart; | |||
short counting[FSE_MAX_SYMBOL_VALUE+1]; | |||
unsigned tableLog; | |||
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; | |||
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; | |||
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); | |||
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); | |||
/* normal FSE decoding mode */ | |||
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); | |||
if (FSE_isError(NCountLength)) return NCountLength; | |||
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ | |||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | |||
ip += NCountLength; | |||
cSrcSize -= NCountLength; | |||
{ | |||
size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); | |||
if (FSE_isError(NCountLength)) return NCountLength; | |||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | |||
assert(NCountLength <= cSrcSize); | |||
ip += NCountLength; | |||
cSrcSize -= NCountLength; | |||
} | |||
CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); | |||
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); | |||
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); | |||
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); | |||
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); | |||
return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ | |||
} | |||
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); | |||
{ | |||
const void* ptr = wksp->dtable; | |||
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | |||
const U32 fastMode = DTableH->fastMode; | |||
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; | |||
/* select fast mode (static) */ | |||
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); | |||
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); | |||
} | |||
} | |||
size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) | |||
/* Avoids the FORCE_INLINE of the _body() function. */ | |||
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | |||
{ | |||
DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ | |||
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); | |||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); | |||
} | |||
#if DYNAMIC_BMI2 | |||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | |||
{ | |||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); | |||
} | |||
#endif | |||
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) | |||
{ | |||
#if DYNAMIC_BMI2 | |||
if (bmi2) { | |||
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | |||
} | |||
#endif | |||
(void)bmi2; | |||
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | |||
} | |||
#endif /* FSE_COMMONDEFS_ONLY */ |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* hist : Histogram functions | |||
* part of Finite State Entropy project | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -34,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
unsigned largestCount=0; | |||
memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); | |||
ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); | |||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } | |||
while (ip<end) { | |||
@@ -60,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; | |||
* this design makes better use of OoO cpus, | |||
* and is noticeably faster when some values are heavily repeated. | |||
* But it needs some additional workspace for intermediate tables. | |||
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. | |||
* `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. | |||
* @return : largest histogram frequency, | |||
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ | |||
* or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ | |||
static size_t HIST_count_parallel_wksp( | |||
unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, | |||
@@ -71,22 +71,21 @@ static size_t HIST_count_parallel_wksp( | |||
{ | |||
const BYTE* ip = (const BYTE*)source; | |||
const BYTE* const iend = ip+sourceSize; | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); | |||
unsigned max=0; | |||
U32* const Counting1 = workSpace; | |||
U32* const Counting2 = Counting1 + 256; | |||
U32* const Counting3 = Counting2 + 256; | |||
U32* const Counting4 = Counting3 + 256; | |||
memset(workSpace, 0, 4*256*sizeof(unsigned)); | |||
/* safety checks */ | |||
assert(*maxSymbolValuePtr <= 255); | |||
if (!sourceSize) { | |||
memset(count, 0, maxSymbolValue + 1); | |||
ZSTD_memset(count, 0, countSize); | |||
*maxSymbolValuePtr = 0; | |||
return 0; | |||
} | |||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ | |||
ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); | |||
/* by stripes of 16 bytes */ | |||
{ U32 cached = MEM_read32(ip); ip += 4; | |||
@@ -118,21 +117,18 @@ static size_t HIST_count_parallel_wksp( | |||
/* finish last symbols */ | |||
while (ip<iend) Counting1[*ip++]++; | |||
if (check) { /* verify stats will fit into destination table */ | |||
U32 s; for (s=255; s>maxSymbolValue; s--) { | |||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); | |||
} } | |||
{ U32 s; | |||
if (maxSymbolValue > 255) maxSymbolValue = 255; | |||
for (s=0; s<=maxSymbolValue; s++) { | |||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (count[s] > max) max = count[s]; | |||
for (s=0; s<256; s++) { | |||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (Counting1[s] > max) max = Counting1[s]; | |||
} } | |||
while (!count[maxSymbolValue]) maxSymbolValue--; | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
{ unsigned maxSymbolValue = 255; | |||
while (!Counting1[maxSymbolValue]) maxSymbolValue--; | |||
if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ | |||
} | |||
return (size_t)max; | |||
} | |||
@@ -152,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); | |||
} | |||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ | |||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize) | |||
{ | |||
unsigned tmpCounters[HIST_WKSP_SIZE_U32]; | |||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); | |||
} | |||
/* HIST_count_wksp() : | |||
* Same as HIST_count(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ | |||
@@ -175,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); | |||
} | |||
#ifndef ZSTD_NO_UNUSED_FUNCTIONS | |||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ | |||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize) | |||
{ | |||
unsigned tmpCounters[HIST_WKSP_SIZE_U32]; | |||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); | |||
} | |||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
unsigned tmpCounters[HIST_WKSP_SIZE_U32]; | |||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); | |||
} | |||
#endif |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* hist : Histogram functions | |||
* part of Finite State Entropy project | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -14,7 +14,7 @@ | |||
****************************************************************** */ | |||
/* --- dependencies --- */ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd_deps.h" /* size_t */ | |||
/* --- simple histogram functions --- */ |
@@ -1,7 +1,7 @@ | |||
/* ****************************************************************** | |||
* huff0 huffman codec, | |||
* part of Finite State Entropy library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
@@ -20,105 +20,30 @@ extern "C" { | |||
#define HUF_H_298734234 | |||
/* *** Dependencies *** */ | |||
#include <stddef.h> /* size_t */ | |||
/* *** library symbols visibility *** */ | |||
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, | |||
* HUF symbols remain "private" (internal symbols for library only). | |||
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ | |||
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) | |||
# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) | |||
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ | |||
# define HUF_PUBLIC_API __declspec(dllexport) | |||
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) | |||
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ | |||
#else | |||
# define HUF_PUBLIC_API | |||
#endif | |||
/* ========================== */ | |||
/* *** simple functions *** */ | |||
/* ========================== */ | |||
/** HUF_compress() : | |||
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. | |||
* 'dst' buffer must be already allocated. | |||
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). | |||
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. | |||
* @return : size of compressed data (<= `dstCapacity`). | |||
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! | |||
* if HUF_isError(return), compression failed (more details using HUF_getErrorName()) | |||
*/ | |||
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize); | |||
/** HUF_decompress() : | |||
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', | |||
* into already allocated buffer 'dst', of minimum size 'dstSize'. | |||
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. | |||
* Note : in contrast with FSE, HUF_decompress can regenerate | |||
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, | |||
* because it knows size to regenerate (originalSize). | |||
* @return : size of regenerated data (== originalSize), | |||
* or an error code, which can be tested using HUF_isError() | |||
*/ | |||
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, | |||
const void* cSrc, size_t cSrcSize); | |||
#include "zstd_deps.h" /* size_t */ | |||
#include "mem.h" /* U32 */ | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
/* *** Tool functions *** */ | |||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ | |||
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ | |||
#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ | |||
size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ | |||
/* Error Management */ | |||
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ | |||
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ | |||
/* *** Advanced function *** */ | |||
/** HUF_compress2() : | |||
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. | |||
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . | |||
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */ | |||
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog); | |||
/** HUF_compress4X_wksp() : | |||
* Same as HUF_compress2(), but uses externally allocated `workSpace`. | |||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ | |||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) | |||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) | |||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize); | |||
unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ | |||
const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ | |||
#endif /* HUF_H_298734234 */ | |||
/* ****************************************************************** | |||
* WARNING !! | |||
* The following section contains advanced and experimental definitions | |||
* which shall never be used in the context of a dynamic library, | |||
* because they are not guaranteed to remain stable in the future. | |||
* Only consider them in association with static linking. | |||
* *****************************************************************/ | |||
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) | |||
#define HUF_H_HUF_STATIC_LINKING_ONLY | |||
/* *** Dependencies *** */ | |||
#include "mem.h" /* U32 */ | |||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) | |||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) | |||
/* *** Constants *** */ | |||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ | |||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ | |||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ | |||
#define HUF_SYMBOLVALUE_MAX 255 | |||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |||
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) | |||
# error "HUF_TABLELOG_MAX is too large !" | |||
#endif | |||
@@ -133,12 +58,12 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, | |||
#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ | |||
/* static allocation of HUF's Compression Table */ | |||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ | |||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) | |||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ | |||
typedef size_t HUF_CElt; /* consider it an incomplete type */ | |||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */ | |||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t)) | |||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ | |||
U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ | |||
void* name##hv = &(name##hb); \ | |||
HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ | |||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */ | |||
/* static allocation of HUF's DTable */ | |||
typedef U32 HUF_DTable; | |||
@@ -152,25 +77,49 @@ typedef U32 HUF_DTable; | |||
/* **************************************** | |||
* Advanced decompression functions | |||
******************************************/ | |||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
#endif | |||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ | |||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ | |||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ | |||
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
#endif | |||
/** | |||
* Huffman flags bitset. | |||
* For all flags, 0 is the default value. | |||
*/ | |||
typedef enum { | |||
/** | |||
* If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. | |||
* Otherwise: Ignored. | |||
*/ | |||
HUF_flags_bmi2 = (1 << 0), | |||
/** | |||
* If set: Test possible table depths to find the one that produces the smallest header + encoded size. | |||
* If unset: Use heuristic to find the table depth. | |||
*/ | |||
HUF_flags_optimalDepth = (1 << 1), | |||
/** | |||
* If set: If the previous table can encode the input, always reuse the previous table. | |||
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. | |||
*/ | |||
HUF_flags_preferRepeat = (1 << 2), | |||
/** | |||
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. | |||
* If unset: Always histogram the entire input. | |||
*/ | |||
HUF_flags_suspectUncompressible = (1 << 3), | |||
/** | |||
* If set: Don't use assembly implementations | |||
* If unset: Allow using assembly implementations | |||
*/ | |||
HUF_flags_disableAsm = (1 << 4), | |||
/** | |||
* If set: Don't use the fast decoding loop, always use the fallback decoding loop. | |||
* If unset: Use the fast decoding loop when possible. | |||
*/ | |||
HUF_flags_disableFast = (1 << 5) | |||
} HUF_flags_e; | |||
/* **************************************** | |||
* HUF detailed API | |||
* ****************************************/ | |||
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra | |||
/*! HUF_compress() does the following: | |||
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") | |||
@@ -183,11 +132,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, | |||
* For example, it's possible to compress several blocks using the same 'CTable', | |||
* or to save and regenerate 'CTable' using external methods. | |||
*/ | |||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); | |||
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ | |||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ | |||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); | |||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |||
unsigned HUF_minTableLog(unsigned symbolCardinality); | |||
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); | |||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, | |||
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ | |||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); | |||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); | |||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); | |||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); | |||
@@ -196,22 +146,24 @@ typedef enum { | |||
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ | |||
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ | |||
} HUF_repeat; | |||
/** HUF_compress4X_repeat() : | |||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
* If preferRepeat then the old table will always be used if valid. | |||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ | |||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags); | |||
/** HUF_buildCTable_wksp() : | |||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |||
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. | |||
*/ | |||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) | |||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) | |||
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) | |||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, | |||
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, | |||
@@ -226,15 +178,27 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, | |||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize); | |||
/*! HUF_readStats_wksp() : | |||
* Same as HUF_readStats() but takes an external workspace which must be | |||
* 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. | |||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. | |||
*/ | |||
#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) | |||
#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) | |||
size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, | |||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize, | |||
void* workspace, size_t wkspSize, | |||
int flags); | |||
/** HUF_readCTable() : | |||
* Loading a CTable saved with HUF_writeCTable() */ | |||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); | |||
/** HUF_getNbBits() : | |||
/** HUF_getNbBitsFromCTable() : | |||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX | |||
* Note 1 : is not inlined, as HUF_CElt definition is private | |||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ | |||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); | |||
* Note 1 : is not inlined, as HUF_CElt definition is private */ | |||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); | |||
/* | |||
* HUF_decompress() does the following: | |||
@@ -260,80 +224,49 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); | |||
* a required workspace size greater than that specified in the following | |||
* macro. | |||
*/ | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); | |||
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); | |||
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
#endif | |||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
/* ====================== */ | |||
/* single stream variants */ | |||
/* ====================== */ | |||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); | |||
/** HUF_compress1X_repeat() : | |||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
* If preferRepeat then the old table will always be used if valid. | |||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ | |||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int flags); | |||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ | |||
size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ | |||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */ | |||
#endif | |||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); | |||
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); | |||
/* BMI2 variants. | |||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. | |||
*/ | |||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); | |||
#endif | |||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ | |||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); | |||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); | |||
#endif | |||
/* BMI2 variants. | |||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. | |||
*/ | |||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
#endif | |||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
#endif /* HUF_STATIC_LINKING_ONLY */ | |||
#endif /* HUF_H_298734234 */ | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -18,8 +18,10 @@ extern "C" { | |||
/*-**************************************** | |||
* Dependencies | |||
******************************************/ | |||
#include <stddef.h> /* size_t, ptrdiff_t */ | |||
#include <string.h> /* memcpy */ | |||
#include <stddef.h> /* size_t, ptrdiff_t */ | |||
#include "compiler.h" /* __has_builtin */ | |||
#include "debug.h" /* DEBUG_STATIC_ASSERT */ | |||
#include "zstd_deps.h" /* ZSTD_memcpy */ | |||
/*-**************************************** | |||
@@ -39,94 +41,18 @@ extern "C" { | |||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ | |||
#endif | |||
#ifndef __has_builtin | |||
# define __has_builtin(x) 0 /* compat. with non-clang compilers */ | |||
#endif | |||
/* code only tested on 32 and 64 bits systems */ | |||
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } | |||
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } | |||
/* detects whether we are being compiled under msan */ | |||
#if defined (__has_feature) | |||
# if __has_feature(memory_sanitizer) | |||
# define MEMORY_SANITIZER 1 | |||
# endif | |||
#endif | |||
#if defined (MEMORY_SANITIZER) | |||
/* Not all platforms that support msan provide sanitizers/msan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
#include <stdint.h> /* intptr_t */ | |||
/* Make memory region fully initialized (without changing its contents). */ | |||
void __msan_unpoison(const volatile void *a, size_t size); | |||
/* Make memory region fully uninitialized (without changing its contents). | |||
This is a legacy interface that does not update origin information. Use | |||
__msan_allocated_memory() instead. */ | |||
void __msan_poison(const volatile void *a, size_t size); | |||
/* Returns the offset of the first (at least partially) poisoned byte in the | |||
memory range, or -1 if the whole range is good. */ | |||
intptr_t __msan_test_shadow(const volatile void *x, size_t size); | |||
#endif | |||
/* detects whether we are being compiled under asan */ | |||
#if defined (__has_feature) | |||
# if __has_feature(address_sanitizer) | |||
# define ADDRESS_SANITIZER 1 | |||
# endif | |||
#elif defined(__SANITIZE_ADDRESS__) | |||
# define ADDRESS_SANITIZER 1 | |||
#endif | |||
#if defined (ADDRESS_SANITIZER) | |||
/* Not all platforms that support asan provide sanitizers/asan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable. | |||
* | |||
* This memory must be previously allocated by your program. Instrumented | |||
* code is forbidden from accessing addresses in this region until it is | |||
* unpoisoned. This function is not guaranteed to poison the entire region - | |||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan | |||
* alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can poison or | |||
* unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_poison_memory_region(void const volatile *addr, size_t size); | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable. | |||
* | |||
* This memory must be previously allocated by your program. Accessing | |||
* addresses in this region is allowed until this region is poisoned again. | |||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due | |||
* to ASan alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can | |||
* poison or unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |||
#endif | |||
/*-************************************************************** | |||
* Basic Types | |||
*****************************************************************/ | |||
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) | |||
# include <stdint.h> | |||
# if defined(_AIX) | |||
# include <inttypes.h> | |||
# else | |||
# include <stdint.h> /* intptr_t */ | |||
# endif | |||
typedef uint8_t BYTE; | |||
typedef uint8_t U8; | |||
typedef int8_t S8; | |||
typedef uint16_t U16; | |||
typedef int16_t S16; | |||
typedef uint32_t U32; | |||
@@ -139,6 +65,8 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |||
# error "this implementation requires char to be exactly 8-bit type" | |||
#endif | |||
typedef unsigned char BYTE; | |||
typedef unsigned char U8; | |||
typedef signed char S8; | |||
#if USHRT_MAX != 65535 | |||
# error "this implementation requires short to be exactly 16-bit type" | |||
#endif | |||
@@ -157,25 +85,63 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |||
/*-************************************************************** | |||
* Memory I/O | |||
* Memory I/O API | |||
*****************************************************************/ | |||
/*=== Static platform detection ===*/ | |||
MEM_STATIC unsigned MEM_32bits(void); | |||
MEM_STATIC unsigned MEM_64bits(void); | |||
MEM_STATIC unsigned MEM_isLittleEndian(void); | |||
/*=== Native unaligned read/write ===*/ | |||
MEM_STATIC U16 MEM_read16(const void* memPtr); | |||
MEM_STATIC U32 MEM_read32(const void* memPtr); | |||
MEM_STATIC U64 MEM_read64(const void* memPtr); | |||
MEM_STATIC size_t MEM_readST(const void* memPtr); | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value); | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value); | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value); | |||
/*=== Little endian unaligned read/write ===*/ | |||
MEM_STATIC U16 MEM_readLE16(const void* memPtr); | |||
MEM_STATIC U32 MEM_readLE24(const void* memPtr); | |||
MEM_STATIC U32 MEM_readLE32(const void* memPtr); | |||
MEM_STATIC U64 MEM_readLE64(const void* memPtr); | |||
MEM_STATIC size_t MEM_readLEST(const void* memPtr); | |||
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); | |||
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); | |||
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); | |||
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); | |||
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); | |||
/*=== Big endian unaligned read/write ===*/ | |||
MEM_STATIC U32 MEM_readBE32(const void* memPtr); | |||
MEM_STATIC U64 MEM_readBE64(const void* memPtr); | |||
MEM_STATIC size_t MEM_readBEST(const void* memPtr); | |||
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); | |||
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); | |||
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); | |||
/*=== Byteswap ===*/ | |||
MEM_STATIC U32 MEM_swap32(U32 in); | |||
MEM_STATIC U64 MEM_swap64(U64 in); | |||
MEM_STATIC size_t MEM_swapST(size_t in); | |||
/*-************************************************************** | |||
* Memory I/O Implementation | |||
*****************************************************************/ | |||
/* MEM_FORCE_MEMORY_ACCESS : | |||
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. | |||
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. | |||
* The below switch allow to select different access method for improved performance. | |||
* Method 0 (default) : use `memcpy()`. Safe and portable. | |||
* Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). | |||
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. | |||
/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: | |||
* Method 0 : always use `memcpy()`. Safe and portable. | |||
* Method 1 : Use compiler extension to set unaligned access. | |||
* Method 2 : direct access. This method is portable but violate C standard. | |||
* It can generate buggy code on targets depending on alignment. | |||
* In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) | |||
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. | |||
* Prefer these methods in priority order (0 > 1 > 2) | |||
* Default : method 1 if supported, else method 0 | |||
*/ | |||
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ | |||
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |||
# define MEM_FORCE_MEMORY_ACCESS 2 | |||
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) | |||
# ifdef __GNUC__ | |||
# define MEM_FORCE_MEMORY_ACCESS 1 | |||
# endif | |||
#endif | |||
@@ -185,8 +151,22 @@ MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } | |||
MEM_STATIC unsigned MEM_isLittleEndian(void) | |||
{ | |||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) | |||
return 1; | |||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
return 0; | |||
#elif defined(__clang__) && __LITTLE_ENDIAN__ | |||
return 1; | |||
#elif defined(__clang__) && __BIG_ENDIAN__ | |||
return 0; | |||
#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86) | |||
return 1; | |||
#elif defined(__DMC__) && defined(_M_IX86) | |||
return 1; | |||
#else | |||
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ | |||
return one.c[0]; | |||
#endif | |||
} | |||
#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) | |||
@@ -204,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } | |||
#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) | |||
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ | |||
/* currently only defined for gcc and icc */ | |||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) | |||
__pragma( pack(push, 1) ) | |||
typedef struct { U16 v; } unalign16; | |||
typedef struct { U32 v; } unalign32; | |||
typedef struct { U64 v; } unalign64; | |||
typedef struct { size_t v; } unalignArch; | |||
__pragma( pack(pop) ) | |||
#else | |||
typedef struct { U16 v; } __attribute__((packed)) unalign16; | |||
typedef struct { U32 v; } __attribute__((packed)) unalign32; | |||
typedef struct { U64 v; } __attribute__((packed)) unalign64; | |||
typedef struct { size_t v; } __attribute__((packed)) unalignArch; | |||
#endif | |||
typedef __attribute__((aligned(1))) U16 unalign16; | |||
typedef __attribute__((aligned(1))) U32 unalign32; | |||
typedef __attribute__((aligned(1))) U64 unalign64; | |||
typedef __attribute__((aligned(1))) size_t unalignArch; | |||
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } | |||
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } | |||
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } | |||
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } | |||
MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } | |||
MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } | |||
MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } | |||
MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; } | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } | |||
#else | |||
@@ -236,41 +205,49 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = | |||
MEM_STATIC U16 MEM_read16(const void* memPtr) | |||
{ | |||
U16 val; memcpy(&val, memPtr, sizeof(val)); return val; | |||
U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; | |||
} | |||
MEM_STATIC U32 MEM_read32(const void* memPtr) | |||
{ | |||
U32 val; memcpy(&val, memPtr, sizeof(val)); return val; | |||
U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; | |||
} | |||
MEM_STATIC U64 MEM_read64(const void* memPtr) | |||
{ | |||
U64 val; memcpy(&val, memPtr, sizeof(val)); return val; | |||
U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; | |||
} | |||
MEM_STATIC size_t MEM_readST(const void* memPtr) | |||
{ | |||
size_t val; memcpy(&val, memPtr, sizeof(val)); return val; | |||
size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; | |||
} | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) | |||
{ | |||
memcpy(memPtr, &value, sizeof(value)); | |||
ZSTD_memcpy(memPtr, &value, sizeof(value)); | |||
} | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) | |||
{ | |||
memcpy(memPtr, &value, sizeof(value)); | |||
ZSTD_memcpy(memPtr, &value, sizeof(value)); | |||
} | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) | |||
{ | |||
memcpy(memPtr, &value, sizeof(value)); | |||
ZSTD_memcpy(memPtr, &value, sizeof(value)); | |||
} | |||
#endif /* MEM_FORCE_MEMORY_ACCESS */ | |||
MEM_STATIC U32 MEM_swap32_fallback(U32 in) | |||
{ | |||
return ((in << 24) & 0xff000000 ) | | |||
((in << 8) & 0x00ff0000 ) | | |||
((in >> 8) & 0x0000ff00 ) | | |||
((in >> 24) & 0x000000ff ); | |||
} | |||
MEM_STATIC U32 MEM_swap32(U32 in) | |||
{ | |||
#if defined(_MSC_VER) /* Visual Studio */ | |||
@@ -279,22 +256,13 @@ MEM_STATIC U32 MEM_swap32(U32 in) | |||
|| (defined(__clang__) && __has_builtin(__builtin_bswap32)) | |||
return __builtin_bswap32(in); | |||
#else | |||
return ((in << 24) & 0xff000000 ) | | |||
((in << 8) & 0x00ff0000 ) | | |||
((in >> 8) & 0x0000ff00 ) | | |||
((in >> 24) & 0x000000ff ); | |||
return MEM_swap32_fallback(in); | |||
#endif | |||
} | |||
MEM_STATIC U64 MEM_swap64(U64 in) | |||
MEM_STATIC U64 MEM_swap64_fallback(U64 in) | |||
{ | |||
#if defined(_MSC_VER) /* Visual Studio */ | |||
return _byteswap_uint64(in); | |||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ | |||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64)) | |||
return __builtin_bswap64(in); | |||
#else | |||
return ((in << 56) & 0xff00000000000000ULL) | | |||
return ((in << 56) & 0xff00000000000000ULL) | | |||
((in << 40) & 0x00ff000000000000ULL) | | |||
((in << 24) & 0x0000ff0000000000ULL) | | |||
((in << 8) & 0x000000ff00000000ULL) | | |||
@@ -302,6 +270,17 @@ MEM_STATIC U64 MEM_swap64(U64 in) | |||
((in >> 24) & 0x0000000000ff0000ULL) | | |||
((in >> 40) & 0x000000000000ff00ULL) | | |||
((in >> 56) & 0x00000000000000ffULL); | |||
} | |||
MEM_STATIC U64 MEM_swap64(U64 in) | |||
{ | |||
#if defined(_MSC_VER) /* Visual Studio */ | |||
return _byteswap_uint64(in); | |||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ | |||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64)) | |||
return __builtin_bswap64(in); | |||
#else | |||
return MEM_swap64_fallback(in); | |||
#endif | |||
} | |||
@@ -338,7 +317,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) | |||
MEM_STATIC U32 MEM_readLE24(const void* memPtr) | |||
{ | |||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); | |||
return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); | |||
} | |||
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) | |||
@@ -445,6 +424,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) | |||
MEM_writeBE64(memPtr, (U64)val); | |||
} | |||
/* code only tested on 32 and 64 bits systems */ | |||
MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -10,9 +10,9 @@ | |||
/* ====== Dependencies ======= */ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd_deps.h" /* size_t */ | |||
#include "debug.h" /* assert */ | |||
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ | |||
#include "zstd_internal.h" /* ZSTD_customCalloc, ZSTD_customFree */ | |||
#include "pool.h" | |||
/* ====== Compiler specifics ====== */ | |||
@@ -86,7 +86,7 @@ static void* POOL_thread(void* opaque) { | |||
{ POOL_job const job = ctx->queue[ctx->queueHead]; | |||
ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; | |||
ctx->numThreadsBusy++; | |||
ctx->queueEmpty = ctx->queueHead == ctx->queueTail; | |||
ctx->queueEmpty = (ctx->queueHead == ctx->queueTail); | |||
/* Unlock the mutex, signal a pusher, and run the job */ | |||
ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
@@ -96,33 +96,37 @@ static void* POOL_thread(void* opaque) { | |||
/* If the intended queue size was 0, signal after finishing job */ | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
ctx->numThreadsBusy--; | |||
if (ctx->queueSize == 1) { | |||
ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
} | |||
ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
} | |||
} /* for (;;) */ | |||
assert(0); /* Unreachable */ | |||
} | |||
/* ZSTD_createThreadPool() : public access point */ | |||
POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { | |||
return POOL_create (numThreads, 0); | |||
} | |||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { | |||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); | |||
} | |||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, | |||
ZSTD_customMem customMem) { | |||
ZSTD_customMem customMem) | |||
{ | |||
POOL_ctx* ctx; | |||
/* Check parameters */ | |||
if (!numThreads) { return NULL; } | |||
/* Allocate the context and zero initialize */ | |||
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); | |||
ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); | |||
if (!ctx) { return NULL; } | |||
/* Initialize the job queue. | |||
* It needs one extra space since one space is wasted to differentiate | |||
* empty and full queues. | |||
*/ | |||
ctx->queueSize = queueSize + 1; | |||
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); | |||
ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem); | |||
ctx->queueHead = 0; | |||
ctx->queueTail = 0; | |||
ctx->numThreadsBusy = 0; | |||
@@ -136,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, | |||
} | |||
ctx->shutdown = 0; | |||
/* Allocate space for the thread handles */ | |||
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); | |||
ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); | |||
ctx->threadCapacity = 0; | |||
ctx->customMem = customMem; | |||
/* Check for errors */ | |||
@@ -169,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) { | |||
/* Join all of the threads */ | |||
{ size_t i; | |||
for (i = 0; i < ctx->threadCapacity; ++i) { | |||
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ | |||
ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */ | |||
} } | |||
} | |||
@@ -179,14 +183,27 @@ void POOL_free(POOL_ctx *ctx) { | |||
ZSTD_pthread_mutex_destroy(&ctx->queueMutex); | |||
ZSTD_pthread_cond_destroy(&ctx->queuePushCond); | |||
ZSTD_pthread_cond_destroy(&ctx->queuePopCond); | |||
ZSTD_free(ctx->queue, ctx->customMem); | |||
ZSTD_free(ctx->threads, ctx->customMem); | |||
ZSTD_free(ctx, ctx->customMem); | |||
ZSTD_customFree(ctx->queue, ctx->customMem); | |||
ZSTD_customFree(ctx->threads, ctx->customMem); | |||
ZSTD_customFree(ctx, ctx->customMem); | |||
} | |||
/*! POOL_joinJobs() : | |||
* Waits for all queued jobs to finish executing. | |||
*/ | |||
void POOL_joinJobs(POOL_ctx* ctx) { | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) { | |||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); | |||
} | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
} | |||
void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { | |||
POOL_free (pool); | |||
} | |||
size_t POOL_sizeof(POOL_ctx *ctx) { | |||
size_t POOL_sizeof(const POOL_ctx* ctx) { | |||
if (ctx==NULL) return 0; /* supports sizeof NULL */ | |||
return sizeof(*ctx) | |||
+ ctx->queueSize * sizeof(POOL_job) | |||
@@ -203,11 +220,11 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) | |||
return 0; | |||
} | |||
/* numThreads > threadCapacity */ | |||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); | |||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); | |||
if (!threadPool) return 1; | |||
/* replace existing thread pool */ | |||
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); | |||
ZSTD_free(ctx->threads, ctx->customMem); | |||
ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); | |||
ZSTD_customFree(ctx->threads, ctx->customMem); | |||
ctx->threads = threadPool; | |||
/* Initialize additional threads */ | |||
{ size_t threadId; | |||
@@ -251,9 +268,12 @@ static int isQueueFull(POOL_ctx const* ctx) { | |||
} | |||
static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) | |||
static void | |||
POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) | |||
{ | |||
POOL_job const job = {function, opaque}; | |||
POOL_job job; | |||
job.function = function; | |||
job.opaque = opaque; | |||
assert(ctx != NULL); | |||
if (ctx->shutdown) return; | |||
@@ -301,21 +321,28 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) | |||
struct POOL_ctx_s { | |||
int dummy; | |||
}; | |||
static POOL_ctx g_ctx; | |||
static POOL_ctx g_poolCtx; | |||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { | |||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); | |||
} | |||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { | |||
POOL_ctx* | |||
POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) | |||
{ | |||
(void)numThreads; | |||
(void)queueSize; | |||
(void)customMem; | |||
return &g_ctx; | |||
return &g_poolCtx; | |||
} | |||
void POOL_free(POOL_ctx* ctx) { | |||
assert(!ctx || ctx == &g_ctx); | |||
assert(!ctx || ctx == &g_poolCtx); | |||
(void)ctx; | |||
} | |||
void POOL_joinJobs(POOL_ctx* ctx){ | |||
assert(!ctx || ctx == &g_poolCtx); | |||
(void)ctx; | |||
} | |||
@@ -335,9 +362,9 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { | |||
return 1; | |||
} | |||
size_t POOL_sizeof(POOL_ctx* ctx) { | |||
size_t POOL_sizeof(const POOL_ctx* ctx) { | |||
if (ctx==NULL) return 0; /* supports sizeof NULL */ | |||
assert(ctx == &g_ctx); | |||
assert(ctx == &g_poolCtx); | |||
return sizeof(*ctx); | |||
} | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -16,11 +16,8 @@ extern "C" { | |||
#endif | |||
#include <stddef.h> /* size_t */ | |||
#ifndef ZSTD_STATIC_LINKING_ONLY | |||
#include "zstd_deps.h" | |||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ | |||
#endif | |||
#include "zstd.h" | |||
typedef struct POOL_ctx_s POOL_ctx; | |||
@@ -41,6 +38,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, | |||
*/ | |||
void POOL_free(POOL_ctx* ctx); | |||
/*! POOL_joinJobs() : | |||
* Waits for all queued jobs to finish executing. | |||
*/ | |||
void POOL_joinJobs(POOL_ctx* ctx); | |||
/*! POOL_resize() : | |||
* Expands or shrinks pool's number of threads. | |||
* This is more efficient than releasing + creating a new context, | |||
@@ -56,7 +59,7 @@ int POOL_resize(POOL_ctx* ctx, size_t numThreads); | |||
* @return threadpool memory usage | |||
* note : compatible with NULL (returns 0 in this case) | |||
*/ | |||
size_t POOL_sizeof(POOL_ctx* ctx); | |||
size_t POOL_sizeof(const POOL_ctx* ctx); | |||
/*! POOL_function : | |||
* The function type that can be added to a thread pool. | |||
@@ -73,7 +76,7 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); | |||
/*! POOL_tryAdd() : | |||
* Add the job `function(opaque)` to thread pool _if_ a worker is available. | |||
* Add the job `function(opaque)` to thread pool _if_ a queue slot is available. | |||
* Returns immediately even if not (does not block). | |||
* @return : 1 if successful, 0 if not. | |||
*/ |
@@ -0,0 +1,156 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_PORTABILITY_MACROS_H | |||
#define ZSTD_PORTABILITY_MACROS_H | |||
/** | |||
* This header file contains macro definitions to support portability. | |||
* This header is shared between C and ASM code, so it MUST only | |||
* contain macro definitions. It MUST not contain any C code. | |||
* | |||
* This header ONLY defines macros to detect platforms/feature support. | |||
* | |||
*/ | |||
/* compat. with non-clang compilers */ | |||
#ifndef __has_attribute | |||
#define __has_attribute(x) 0 | |||
#endif | |||
/* compat. with non-clang compilers */ | |||
#ifndef __has_builtin | |||
# define __has_builtin(x) 0 | |||
#endif | |||
/* compat. with non-clang compilers */ | |||
#ifndef __has_feature | |||
# define __has_feature(x) 0 | |||
#endif | |||
/* detects whether we are being compiled under msan */ | |||
#ifndef ZSTD_MEMORY_SANITIZER | |||
# if __has_feature(memory_sanitizer) | |||
# define ZSTD_MEMORY_SANITIZER 1 | |||
# else | |||
# define ZSTD_MEMORY_SANITIZER 0 | |||
# endif | |||
#endif | |||
/* detects whether we are being compiled under asan */ | |||
#ifndef ZSTD_ADDRESS_SANITIZER | |||
# if __has_feature(address_sanitizer) | |||
# define ZSTD_ADDRESS_SANITIZER 1 | |||
# elif defined(__SANITIZE_ADDRESS__) | |||
# define ZSTD_ADDRESS_SANITIZER 1 | |||
# else | |||
# define ZSTD_ADDRESS_SANITIZER 0 | |||
# endif | |||
#endif | |||
/* detects whether we are being compiled under dfsan */ | |||
#ifndef ZSTD_DATAFLOW_SANITIZER | |||
# if __has_feature(dataflow_sanitizer) | |||
# define ZSTD_DATAFLOW_SANITIZER 1 | |||
# else | |||
# define ZSTD_DATAFLOW_SANITIZER 0 | |||
# endif | |||
#endif | |||
/* Mark the internal assembly functions as hidden */ | |||
#ifdef __ELF__ | |||
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func | |||
#else | |||
# define ZSTD_HIDE_ASM_FUNCTION(func) | |||
#endif | |||
/* Enable runtime BMI2 dispatch based on the CPU. | |||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | |||
*/ | |||
#ifndef DYNAMIC_BMI2 | |||
#if ((defined(__clang__) && __has_attribute(__target__)) \ | |||
|| (defined(__GNUC__) \ | |||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | |||
&& (defined(__x86_64__) || defined(_M_X64)) \ | |||
&& !defined(__BMI2__) | |||
# define DYNAMIC_BMI2 1 | |||
#else | |||
# define DYNAMIC_BMI2 0 | |||
#endif | |||
#endif | |||
/** | |||
* Only enable assembly for GNUC compatible compilers, | |||
* because other platforms may not support GAS assembly syntax. | |||
* | |||
* Only enable assembly for Linux / MacOS, other platforms may | |||
* work, but they haven't been tested. This could likely be | |||
* extended to BSD systems. | |||
* | |||
* Disable assembly when MSAN is enabled, because MSAN requires | |||
* 100% of code to be instrumented to work. | |||
*/ | |||
#if defined(__GNUC__) | |||
# if defined(__linux__) || defined(__linux) || defined(__APPLE__) | |||
# if ZSTD_MEMORY_SANITIZER | |||
# define ZSTD_ASM_SUPPORTED 0 | |||
# elif ZSTD_DATAFLOW_SANITIZER | |||
# define ZSTD_ASM_SUPPORTED 0 | |||
# else | |||
# define ZSTD_ASM_SUPPORTED 1 | |||
# endif | |||
# else | |||
# define ZSTD_ASM_SUPPORTED 0 | |||
# endif | |||
#else | |||
# define ZSTD_ASM_SUPPORTED 0 | |||
#endif | |||
/** | |||
* Determines whether we should enable assembly for x86-64 | |||
* with BMI2. | |||
* | |||
* Enable if all of the following conditions hold: | |||
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM | |||
* - Assembly is supported | |||
* - We are compiling for x86-64 and either: | |||
* - DYNAMIC_BMI2 is enabled | |||
* - BMI2 is supported at compile time | |||
*/ | |||
#if !defined(ZSTD_DISABLE_ASM) && \ | |||
ZSTD_ASM_SUPPORTED && \ | |||
defined(__x86_64__) && \ | |||
(DYNAMIC_BMI2 || defined(__BMI2__)) | |||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 1 | |||
#else | |||
# define ZSTD_ENABLE_ASM_X86_64_BMI2 0 | |||
#endif | |||
/* | |||
* For x86 ELF targets, add .note.gnu.property section for Intel CET in | |||
* assembly sources when CET is enabled. | |||
* | |||
* Additionally, any function that may be called indirectly must begin | |||
* with ZSTD_CET_ENDBRANCH. | |||
*/ | |||
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ | |||
&& defined(__has_include) | |||
# if __has_include(<cet.h>) | |||
# include <cet.h> | |||
# define ZSTD_CET_ENDBRANCH _CET_ENDBR | |||
# endif | |||
#endif | |||
#ifndef ZSTD_CET_ENDBRANCH | |||
# define ZSTD_CET_ENDBRANCH | |||
#endif | |||
#endif /* ZSTD_PORTABILITY_MACROS_H */ |
@@ -1,121 +0,0 @@ | |||
/** | |||
* Copyright (c) 2016 Tino Reichardt | |||
* All rights reserved. | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/** | |||
* This file will hold wrapper for systems, which do not support pthreads | |||
*/ | |||
#include "threading.h" | |||
/* create fake symbol to avoid empty translation unit warning */ | |||
int g_ZSTD_threading_useless_symbol; | |||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |||
/** | |||
* Windows minimalist Pthread Wrapper, based on : | |||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html | |||
*/ | |||
/* === Dependencies === */ | |||
#include <process.h> | |||
#include <errno.h> | |||
/* === Implementation === */ | |||
static unsigned __stdcall worker(void *arg) | |||
{ | |||
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; | |||
thread->arg = thread->start_routine(thread->arg); | |||
return 0; | |||
} | |||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, | |||
void* (*start_routine) (void*), void* arg) | |||
{ | |||
(void)unused; | |||
thread->arg = arg; | |||
thread->start_routine = start_routine; | |||
thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); | |||
if (!thread->handle) | |||
return errno; | |||
else | |||
return 0; | |||
} | |||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) | |||
{ | |||
DWORD result; | |||
if (!thread.handle) return 0; | |||
result = WaitForSingleObject(thread.handle, INFINITE); | |||
switch (result) { | |||
case WAIT_OBJECT_0: | |||
if (value_ptr) *value_ptr = thread.arg; | |||
return 0; | |||
case WAIT_ABANDONED: | |||
return EINVAL; | |||
default: | |||
return GetLastError(); | |||
} | |||
} | |||
#endif /* ZSTD_MULTITHREAD */ | |||
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) | |||
#include <stdlib.h> | |||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) | |||
{ | |||
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); | |||
if (!*mutex) | |||
return 1; | |||
return pthread_mutex_init(*mutex, attr); | |||
} | |||
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) | |||
{ | |||
if (!*mutex) | |||
return 0; | |||
{ | |||
int const ret = pthread_mutex_destroy(*mutex); | |||
free(*mutex); | |||
return ret; | |||
} | |||
} | |||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) | |||
{ | |||
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t)); | |||
if (!*cond) | |||
return 1; | |||
return pthread_cond_init(*cond, attr); | |||
} | |||
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) | |||
{ | |||
if (!*cond) | |||
return 0; | |||
{ | |||
int const ret = pthread_cond_destroy(*cond); | |||
free(*cond); | |||
return ret; | |||
} | |||
} | |||
#endif |
@@ -1,155 +0,0 @@ | |||
/** | |||
* Copyright (c) 2016 Tino Reichardt | |||
* All rights reserved. | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef THREADING_H_938743 | |||
#define THREADING_H_938743 | |||
#include "debug.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |||
/** | |||
* Windows minimalist Pthread Wrapper, based on : | |||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html | |||
*/ | |||
#ifdef WINVER | |||
# undef WINVER | |||
#endif | |||
#define WINVER 0x0600 | |||
#ifdef _WIN32_WINNT | |||
# undef _WIN32_WINNT | |||
#endif | |||
#define _WIN32_WINNT 0x0600 | |||
#ifndef WIN32_LEAN_AND_MEAN | |||
# define WIN32_LEAN_AND_MEAN | |||
#endif | |||
#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ | |||
#include <windows.h> | |||
#undef ERROR | |||
#define ERROR(name) ZSTD_ERROR(name) | |||
/* mutex */ | |||
#define ZSTD_pthread_mutex_t CRITICAL_SECTION | |||
#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) | |||
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) | |||
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) | |||
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) | |||
/* condition variable */ | |||
#define ZSTD_pthread_cond_t CONDITION_VARIABLE | |||
#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) | |||
#define ZSTD_pthread_cond_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) | |||
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) | |||
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) | |||
/* ZSTD_pthread_create() and ZSTD_pthread_join() */ | |||
typedef struct { | |||
HANDLE handle; | |||
void* (*start_routine)(void*); | |||
void* arg; | |||
} ZSTD_pthread_t; | |||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, | |||
void* (*start_routine) (void*), void* arg); | |||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); | |||
/** | |||
* add here more wrappers as required | |||
*/ | |||
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ | |||
/* === POSIX Systems === */ | |||
# include <pthread.h> | |||
#if DEBUGLEVEL < 1 | |||
#define ZSTD_pthread_mutex_t pthread_mutex_t | |||
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) | |||
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) | |||
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) | |||
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) | |||
#define ZSTD_pthread_cond_t pthread_cond_t | |||
#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) | |||
#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) | |||
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) | |||
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) | |||
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) | |||
#define ZSTD_pthread_t pthread_t | |||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) | |||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) | |||
#else /* DEBUGLEVEL >= 1 */ | |||
/* Debug implementation of threading. | |||
* In this implementation we use pointers for mutexes and condition variables. | |||
* This way, if we forget to init/destroy them the program will crash or ASAN | |||
* will report leaks. | |||
*/ | |||
#define ZSTD_pthread_mutex_t pthread_mutex_t* | |||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); | |||
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); | |||
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) | |||
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) | |||
#define ZSTD_pthread_cond_t pthread_cond_t* | |||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); | |||
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); | |||
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) | |||
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) | |||
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) | |||
#define ZSTD_pthread_t pthread_t | |||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) | |||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) | |||
#endif | |||
#else /* ZSTD_MULTITHREAD not defined */ | |||
/* No multithreading support */ | |||
typedef int ZSTD_pthread_mutex_t; | |||
#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) | |||
#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_mutex_lock(a) ((void)(a)) | |||
#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) | |||
typedef int ZSTD_pthread_cond_t; | |||
#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) | |||
#define ZSTD_pthread_cond_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) | |||
#define ZSTD_pthread_cond_signal(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) | |||
/* do not use ZSTD_pthread_t */ | |||
#endif /* ZSTD_MULTITHREAD */ | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* THREADING_H_938743 */ |
@@ -0,0 +1,474 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
#ifndef ZSTD_ZDICT_H | |||
#define ZSTD_ZDICT_H | |||
/*====== Dependencies ======*/ | |||
#include <stddef.h> /* size_t */ | |||
/* ===== ZDICTLIB_API : control library symbols visibility ===== */ | |||
#ifndef ZDICTLIB_VISIBLE | |||
/* Backwards compatibility with old macro name */ | |||
# ifdef ZDICTLIB_VISIBILITY | |||
# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) | |||
# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default"))) | |||
# else | |||
# define ZDICTLIB_VISIBLE | |||
# endif | |||
#endif | |||
#ifndef ZDICTLIB_HIDDEN | |||
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) | |||
# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden"))) | |||
# else | |||
# define ZDICTLIB_HIDDEN | |||
# endif | |||
#endif | |||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE | |||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
#else | |||
# define ZDICTLIB_API ZDICTLIB_VISIBLE | |||
#endif | |||
/******************************************************************************* | |||
* Zstd dictionary builder | |||
* | |||
* FAQ | |||
* === | |||
* Why should I use a dictionary? | |||
* ------------------------------ | |||
* | |||
* Zstd can use dictionaries to improve compression ratio of small data. | |||
* Traditionally small files don't compress well because there is very little | |||
* repetition in a single sample, since it is small. But, if you are compressing | |||
* many similar files, like a bunch of JSON records that share the same | |||
* structure, you can train a dictionary on ahead of time on some samples of | |||
* these files. Then, zstd can use the dictionary to find repetitions that are | |||
* present across samples. This can vastly improve compression ratio. | |||
* | |||
* When is a dictionary useful? | |||
* ---------------------------- | |||
* | |||
* Dictionaries are useful when compressing many small files that are similar. | |||
* The larger a file is, the less benefit a dictionary will have. Generally, | |||
* we don't expect dictionary compression to be effective past 100KB. And the | |||
* smaller a file is, the more we would expect the dictionary to help. | |||
* | |||
* How do I use a dictionary? | |||
* -------------------------- | |||
* | |||
* Simply pass the dictionary to the zstd compressor with | |||
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to | |||
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other | |||
* more advanced functions that allow selecting some options, see zstd.h for | |||
* complete documentation. | |||
* | |||
* What is a zstd dictionary? | |||
* -------------------------- | |||
* | |||
* A zstd dictionary has two pieces: Its header, and its content. The header | |||
* contains a magic number, the dictionary ID, and entropy tables. These | |||
* entropy tables allow zstd to save on header costs in the compressed file, | |||
* which really matters for small data. The content is just bytes, which are | |||
* repeated content that is common across many samples. | |||
* | |||
* What is a raw content dictionary? | |||
* --------------------------------- | |||
* | |||
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary | |||
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw | |||
* content dictionary. | |||
* | |||
* How do I train a dictionary? | |||
* ---------------------------- | |||
* | |||
* Gather samples from your use case. These samples should be similar to each | |||
* other. If you have several use cases, you could try to train one dictionary | |||
* per use case. | |||
* | |||
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your | |||
* dictionary. There are a few advanced versions of this function, but this | |||
* is a great starting point. If you want to further tune your dictionary | |||
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow | |||
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`. | |||
* | |||
* If the dictionary training function fails, that is likely because you | |||
* either passed too few samples, or a dictionary would not be effective | |||
* for your data. Look at the messages that the dictionary trainer printed, | |||
* if it doesn't say too few samples, then a dictionary would not be effective. | |||
* | |||
* How large should my dictionary be? | |||
* ---------------------------------- | |||
* | |||
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB. | |||
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a | |||
* dictionary larger than that. But, most use cases can get away with a | |||
* smaller dictionary. The advanced dictionary builders can automatically | |||
* shrink the dictionary for you, and select the smallest size that doesn't | |||
* hurt compression ratio too much. See the `shrinkDict` parameter. | |||
* A smaller dictionary can save memory, and potentially speed up | |||
* compression. | |||
* | |||
* How many samples should I provide to the dictionary builder? | |||
* ------------------------------------------------------------ | |||
* | |||
* We generally recommend passing ~100x the size of the dictionary | |||
* in samples. A few thousand should suffice. Having too few samples | |||
* can hurt the dictionaries effectiveness. Having more samples will | |||
* only improve the dictionaries effectiveness. But having too many | |||
* samples can slow down the dictionary builder. | |||
* | |||
* How do I determine if a dictionary will be effective? | |||
* ----------------------------------------------------- | |||
* | |||
* Simply train a dictionary and try it out. You can use zstd's built in | |||
* benchmarking tool to test the dictionary effectiveness. | |||
* | |||
* # Benchmark levels 1-3 without a dictionary | |||
* zstd -b1e3 -r /path/to/my/files | |||
* # Benchmark levels 1-3 with a dictionary | |||
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary | |||
* | |||
* When should I retrain a dictionary? | |||
* ----------------------------------- | |||
* | |||
* You should retrain a dictionary when its effectiveness drops. Dictionary | |||
* effectiveness drops as the data you are compressing changes. Generally, we do | |||
* expect dictionaries to "decay" over time, as your data changes, but the rate | |||
* at which they decay depends on your use case. Internally, we regularly | |||
* retrain dictionaries, and if the new dictionary performs significantly | |||
* better than the old dictionary, we will ship the new dictionary. | |||
* | |||
* I have a raw content dictionary, how do I turn it into a zstd dictionary? | |||
* ------------------------------------------------------------------------- | |||
* | |||
* If you have a raw content dictionary, e.g. by manually constructing it, or | |||
* using a third-party dictionary builder, you can turn it into a zstd | |||
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to | |||
* provide some samples of the data. It will add the zstd header to the | |||
* raw content, which contains a dictionary ID and entropy tables, which | |||
* will improve compression ratio, and allow zstd to write the dictionary ID | |||
* into the frame, if you so choose. | |||
* | |||
* Do I have to use zstd's dictionary builder? | |||
* ------------------------------------------- | |||
* | |||
* No! You can construct dictionary content however you please, it is just | |||
* bytes. It will always be valid as a raw content dictionary. If you want | |||
* a zstd dictionary, which can improve compression ratio, use | |||
* `ZDICT_finalizeDictionary()`. | |||
* | |||
* What is the attack surface of a zstd dictionary? | |||
* ------------------------------------------------ | |||
* | |||
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so | |||
* zstd should never crash, or access out-of-bounds memory no matter what | |||
* the dictionary is. However, if an attacker can control the dictionary | |||
* during decompression, they can cause zstd to generate arbitrary bytes, | |||
* just like if they controlled the compressed data. | |||
* | |||
******************************************************************************/ | |||
/*! ZDICT_trainFromBuffer(): | |||
* Train a dictionary from an array of samples. | |||
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, | |||
* f=20, and accel=1. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* Note: Dictionary training will fail if there are not enough samples to construct a | |||
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). | |||
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary | |||
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary | |||
* please open an issue with details, and we can look into it. | |||
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, | |||
const void* samplesBuffer, | |||
const size_t* samplesSizes, unsigned nbSamples); | |||
typedef struct { | |||
int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */ | |||
unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ | |||
unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value) | |||
* NOTE: The zstd format reserves some dictionary IDs for future use. | |||
* You may use them in private settings, but be warned that they | |||
* may be used by zstd in a public dictionary registry in the future. | |||
* These dictionary IDs are: | |||
* - low range : <= 32767 | |||
* - high range : >= (2^31) | |||
*/ | |||
} ZDICT_params_t; | |||
/*! ZDICT_finalizeDictionary(): | |||
* Given a custom content as a basis for dictionary, and a set of samples, | |||
* finalize dictionary by adding headers and statistics according to the zstd | |||
* dictionary format. | |||
* | |||
* Samples must be stored concatenated in a flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each | |||
* sample in order. The samples are used to construct the statistics, so they | |||
* should be representative of what you will compress with this dictionary. | |||
* | |||
* The compression level can be set in `parameters`. You should pass the | |||
* compression level you expect to use in production. The statistics for each | |||
* compression level differ, so tuning the dictionary for the compression level | |||
* can help quite a bit. | |||
* | |||
* You can set an explicit dictionary ID in `parameters`, or allow us to pick | |||
* a random dictionary ID for you, but we can't guarantee no collisions. | |||
* | |||
* The dstDictBuffer and the dictContent may overlap, and the content will be | |||
* appended to the end of the header. If the header + the content doesn't fit in | |||
* maxDictSize the beginning of the content is truncated to make room, since it | |||
* is presumed that the most profitable content is at the end of the dictionary, | |||
* since that is the cheapest to reference. | |||
* | |||
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). | |||
* | |||
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), | |||
* or an error code, which can be tested by ZDICT_isError(). | |||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if | |||
* instructed to, using notificationLevel>0. | |||
* NOTE: This function currently may fail in several edge cases including: | |||
* * Not enough samples | |||
* * Samples are uncompressible | |||
* * Samples are all exactly the same | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, | |||
const void* dictContent, size_t dictContentSize, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
ZDICT_params_t parameters); | |||
/*====== Helper functions ======*/ | |||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ | |||
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ | |||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); | |||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); | |||
#endif /* ZSTD_ZDICT_H */ | |||
#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC) | |||
#define ZSTD_ZDICT_H_STATIC | |||
/* This can be overridden externally to hide static symbols. */ | |||
#ifndef ZDICTLIB_STATIC_API | |||
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE | |||
# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE | |||
# else | |||
# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE | |||
# endif | |||
#endif | |||
/* ==================================================================================== | |||
* The definitions in this section are considered experimental. | |||
* They should never be used with a dynamic library, as they may change in the future. | |||
* They are provided for advanced usages. | |||
* Use them only in association with static linking. | |||
* ==================================================================================== */ | |||
#define ZDICT_DICTSIZE_MIN 256 | |||
/* Deprecated: Remove in v1.6.0 */ | |||
#define ZDICT_CONTENTSIZE_MIN 128 | |||
/*! ZDICT_cover_params_t: | |||
* k and d are the only required parameters. | |||
* For others, value 0 means default. | |||
*/ | |||
typedef struct { | |||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ | |||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ | |||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ | |||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ | |||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |||
ZDICT_params_t zParams; | |||
} ZDICT_cover_params_t; | |||
typedef struct { | |||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ | |||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ | |||
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ | |||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ | |||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ | |||
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ | |||
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |||
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |||
ZDICT_params_t zParams; | |||
} ZDICT_fastCover_params_t; | |||
/*! ZDICT_trainFromBuffer_cover(): | |||
* Train a dictionary from an array of samples using the COVER algorithm. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* See ZDICT_trainFromBuffer() for details on failure modes. | |||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
*/ | |||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover( | |||
void *dictBuffer, size_t dictBufferCapacity, | |||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, | |||
ZDICT_cover_params_t parameters); | |||
/*! ZDICT_optimizeTrainFromBuffer_cover(): | |||
* The same requirements as above hold for all the parameters except `parameters`. | |||
* This function tries many parameter combinations and picks the best parameters. | |||
* `*parameters` is filled with the best parameters found, | |||
* dictionary constructed with those parameters is stored in `dictBuffer`. | |||
* | |||
* All of the parameters d, k, steps are optional. | |||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. | |||
* if steps is zero it defaults to its default value. | |||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. | |||
* | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* On success `*parameters` contains the parameters selected. | |||
* See ZDICT_trainFromBuffer() for details on failure modes. | |||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. | |||
*/ | |||
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover( | |||
void* dictBuffer, size_t dictBufferCapacity, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
ZDICT_cover_params_t* parameters); | |||
/*! ZDICT_trainFromBuffer_fastCover(): | |||
* Train a dictionary from an array of samples using a modified version of COVER algorithm. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* d and k are required. | |||
* All other parameters are optional, will use default values if not provided | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* See ZDICT_trainFromBuffer() for details on failure modes. | |||
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
*/ | |||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, | |||
size_t dictBufferCapacity, const void *samplesBuffer, | |||
const size_t *samplesSizes, unsigned nbSamples, | |||
ZDICT_fastCover_params_t parameters); | |||
/*! ZDICT_optimizeTrainFromBuffer_fastCover(): | |||
* The same requirements as above hold for all the parameters except `parameters`. | |||
* This function tries many parameter combinations (specifically, k and d combinations) | |||
* and picks the best parameters. `*parameters` is filled with the best parameters found, | |||
* dictionary constructed with those parameters is stored in `dictBuffer`. | |||
* All of the parameters d, k, steps, f, and accel are optional. | |||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. | |||
* if steps is zero it defaults to its default value. | |||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. | |||
* If f is zero, default value of 20 is used. | |||
* If accel is zero, default value of 1 is used. | |||
* | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* On success `*parameters` contains the parameters selected. | |||
* See ZDICT_trainFromBuffer() for details on failure modes. | |||
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. | |||
*/ | |||
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, | |||
size_t dictBufferCapacity, const void* samplesBuffer, | |||
const size_t* samplesSizes, unsigned nbSamples, | |||
ZDICT_fastCover_params_t* parameters); | |||
typedef struct { | |||
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ | |||
ZDICT_params_t zParams; | |||
} ZDICT_legacy_params_t; | |||
/*! ZDICT_trainFromBuffer_legacy(): | |||
* Train a dictionary from an array of samples. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* `parameters` is optional and can be provided with values set to 0 to mean "default". | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* See ZDICT_trainFromBuffer() for details on failure modes. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, though this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. | |||
*/ | |||
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy( | |||
void* dictBuffer, size_t dictBufferCapacity, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
ZDICT_legacy_params_t parameters); | |||
/* Deprecation warnings */ | |||
/* It is generally possible to disable deprecation warnings from compiler, | |||
for example with -Wno-deprecated-declarations for gcc | |||
or _CRT_SECURE_NO_WARNINGS in Visual. | |||
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |||
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS | |||
# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */ | |||
#else | |||
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) | |||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ | |||
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] | |||
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) | |||
# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message))) | |||
# elif (ZDICT_GCC_VERSION >= 301) | |||
# define ZDICT_DEPRECATED(message) __attribute__((deprecated)) | |||
# elif defined(_MSC_VER) | |||
# define ZDICT_DEPRECATED(message) __declspec(deprecated(message)) | |||
# else | |||
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") | |||
# define ZDICT_DEPRECATED(message) | |||
# endif | |||
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |||
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") | |||
ZDICTLIB_STATIC_API | |||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |||
#endif /* ZSTD_ZDICT_H_STATIC */ | |||
#if defined (__cplusplus) | |||
} | |||
#endif |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -13,8 +13,8 @@ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include <stdlib.h> /* malloc, calloc, free */ | |||
#include <string.h> /* memset */ | |||
#define ZSTD_DEPS_NEED_MALLOC | |||
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ | |||
#include "error_private.h" | |||
#include "zstd_internal.h" | |||
@@ -53,31 +53,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString | |||
/*=************************************************************** | |||
* Custom allocator | |||
****************************************************************/ | |||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) | |||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) | |||
{ | |||
if (customMem.customAlloc) | |||
return customMem.customAlloc(customMem.opaque, size); | |||
return malloc(size); | |||
return ZSTD_malloc(size); | |||
} | |||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) | |||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) | |||
{ | |||
if (customMem.customAlloc) { | |||
/* calloc implemented as malloc+memset; | |||
* not as efficient as calloc, but next best guess for custom malloc */ | |||
void* const ptr = customMem.customAlloc(customMem.opaque, size); | |||
memset(ptr, 0, size); | |||
ZSTD_memset(ptr, 0, size); | |||
return ptr; | |||
} | |||
return calloc(1, size); | |||
return ZSTD_calloc(1, size); | |||
} | |||
void ZSTD_free(void* ptr, ZSTD_customMem customMem) | |||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) | |||
{ | |||
if (ptr!=NULL) { | |||
if (customMem.customFree) | |||
customMem.customFree(customMem.opaque, ptr); | |||
else | |||
free(ptr); | |||
ZSTD_free(ptr); | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -13,11 +13,36 @@ | |||
***************************************/ | |||
#include "zstd_compress_literals.h" | |||
/* ************************************************************** | |||
* Debug Traces | |||
****************************************************************/ | |||
#if DEBUGLEVEL >= 2 | |||
static size_t showHexa(const void* src, size_t srcSize) | |||
{ | |||
const BYTE* const ip = (const BYTE*)src; | |||
size_t u; | |||
for (u=0; u<srcSize; u++) { | |||
RAWLOG(5, " %02X", ip[u]); (void)ip; | |||
} | |||
RAWLOG(5, " \n"); | |||
return srcSize; | |||
} | |||
#endif | |||
/* ************************************************************** | |||
* Literals compression - special cases | |||
****************************************************************/ | |||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
BYTE* const ostart = (BYTE* const)dst; | |||
BYTE* const ostart = (BYTE*)dst; | |||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); | |||
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); | |||
switch(flSize) | |||
@@ -35,17 +60,31 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, | |||
assert(0); | |||
} | |||
memcpy(ostart + flSize, src, srcSize); | |||
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); | |||
ZSTD_memcpy(ostart + flSize, src, srcSize); | |||
DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); | |||
return srcSize + flSize; | |||
} | |||
static int allBytesIdentical(const void* src, size_t srcSize) | |||
{ | |||
assert(srcSize >= 1); | |||
assert(src != NULL); | |||
{ const BYTE b = ((const BYTE*)src)[0]; | |||
size_t p; | |||
for (p=1; p<srcSize; p++) { | |||
if (((const BYTE*)src)[p] != b) return 0; | |||
} | |||
return 1; | |||
} | |||
} | |||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
BYTE* const ostart = (BYTE* const)dst; | |||
BYTE* const ostart = (BYTE*)dst; | |||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ | |||
assert(dstCapacity >= 4); (void)dstCapacity; | |||
assert(allBytesIdentical(src, srcSize)); | |||
switch(flSize) | |||
{ | |||
@@ -63,68 +102,103 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* | |||
} | |||
ostart[flSize] = *(const BYTE*)src; | |||
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); | |||
DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); | |||
return flSize+1; | |||
} | |||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, int disableLiteralCompression, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize, | |||
const int bmi2) | |||
/* ZSTD_minLiteralsToCompress() : | |||
* returns minimal amount of literals | |||
* for literal compression to even be attempted. | |||
* Minimum is made tighter as compression strategy increases. | |||
*/ | |||
static size_t | |||
ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) | |||
{ | |||
assert((int)strategy >= 0); | |||
assert((int)strategy <= 9); | |||
/* btultra2 : min 8 bytes; | |||
* then 2x larger for each successive compression strategy | |||
* max threshold 64 bytes */ | |||
{ int const shift = MIN(9-(int)strategy, 3); | |||
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; | |||
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); | |||
return mintc; | |||
} | |||
} | |||
size_t ZSTD_compressLiterals ( | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize, | |||
const ZSTD_hufCTables_t* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, | |||
int disableLiteralCompression, | |||
int suspectUncompressible, | |||
int bmi2) | |||
{ | |||
size_t const minGain = ZSTD_minGain(srcSize, strategy); | |||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); | |||
BYTE* const ostart = (BYTE*)dst; | |||
U32 singleStream = srcSize < 256; | |||
symbolEncodingType_e hType = set_compressed; | |||
size_t cLitSize; | |||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", | |||
disableLiteralCompression, (U32)srcSize); | |||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", | |||
disableLiteralCompression, (U32)srcSize, dstCapacity); | |||
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); | |||
/* Prepare nextEntropy assuming reusing the existing table */ | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
if (disableLiteralCompression) | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
/* small ? don't even attempt compression (speed opt) */ | |||
# define COMPRESS_LITERALS_SIZE_MIN 63 | |||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
} | |||
/* if too small, don't even attempt compression (speed opt) */ | |||
if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); | |||
{ HUF_repeat repeat = prevHuf->repeatMode; | |||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; | |||
int const flags = 0 | |||
| (bmi2 ? HUF_flags_bmi2 : 0) | |||
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) | |||
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) | |||
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); | |||
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); | |||
huf_compress_f huf_compress; | |||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; | |||
cLitSize = singleStream ? | |||
HUF_compress1X_repeat( | |||
ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, | |||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : | |||
HUF_compress4X_repeat( | |||
ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, | |||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |||
huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; | |||
cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, | |||
src, srcSize, | |||
HUF_SYMBOLVALUE_MAX, LitHufLog, | |||
entropyWorkspace, entropyWorkspaceSize, | |||
(HUF_CElt*)nextHuf->CTable, | |||
&repeat, flags); | |||
DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); | |||
if (repeat != HUF_repeat_none) { | |||
/* reused the existing table */ | |||
DEBUGLOG(5, "Reusing previous huffman table"); | |||
DEBUGLOG(5, "reusing statistics from previous huffman block"); | |||
hType = set_repeat; | |||
} | |||
} | |||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
} | |||
{ size_t const minGain = ZSTD_minGain(srcSize, strategy); | |||
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { | |||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
} } | |||
if (cLitSize==1) { | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |||
} | |||
/* A return value of 1 signals that the alphabet consists of a single symbol. | |||
* However, in some rare circumstances, it could be the compressed size (a single byte). | |||
* For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. | |||
* (it's also necessary to not generate statistics). | |||
* Therefore, in such a case, actively check that all bytes are identical. */ | |||
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { | |||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |||
} } | |||
if (hType == set_compressed) { | |||
/* using a newly constructed table */ | |||
@@ -135,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
switch(lhSize) | |||
{ | |||
case 3: /* 2 - 2 - 10 - 10 */ | |||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |||
if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); | |||
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |||
MEM_writeLE24(ostart, lhc); | |||
break; | |||
} | |||
case 4: /* 2 - 2 - 14 - 14 */ | |||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); | |||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); | |||
MEM_writeLE32(ostart, lhc); | |||
break; | |||
} | |||
case 5: /* 2 - 2 - 18 - 18 */ | |||
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); | |||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); | |||
MEM_writeLE32(ostart, lhc); | |||
ostart[4] = (BYTE)(cLitSize >> 10); |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -16,14 +16,24 @@ | |||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
/* ZSTD_compressRleLiteralsBlock() : | |||
* Conditions : | |||
* - All bytes in @src are identical | |||
* - dstCapacity >= 4 */ | |||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, int disableLiteralCompression, | |||
void* dst, size_t dstCapacity, | |||
/* ZSTD_compressLiterals(): | |||
* @entropyWorkspace: must be aligned on 4-bytes boundaries | |||
* @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE | |||
* @suspectUncompressible: sampling checks, to potentially skip huffman coding | |||
*/ | |||
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize, | |||
const int bmi2); | |||
const ZSTD_hufCTables_t* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, int disableLiteralCompression, | |||
int suspectUncompressible, | |||
int bmi2); | |||
#endif /* ZSTD_COMPRESS_LITERALS_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { | |||
return maxSymbolValue; | |||
} | |||
/** | |||
* Returns true if we should use ncount=-1 else we should | |||
* use ncount=1 for low probability symbols instead. | |||
*/ | |||
static unsigned ZSTD_useLowProbCount(size_t const nbSeq) | |||
{ | |||
/* Heuristic: This should cover most blocks <= 16K and | |||
* start to fade out after 16K to about 32K depending on | |||
* compressibility. | |||
*/ | |||
return nbSeq >= 2048; | |||
} | |||
/** | |||
* Returns the cost in bytes of encoding the normalized count header. | |||
* Returns an error if any of the helper functions return an error. | |||
@@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, | |||
BYTE wksp[FSE_NCOUNTBOUND]; | |||
S16 norm[MaxSeq + 1]; | |||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); | |||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); | |||
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); | |||
} | |||
@@ -72,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t | |||
{ | |||
unsigned cost = 0; | |||
unsigned s; | |||
assert(total > 0); | |||
for (s = 0; s <= max; ++s) { | |||
unsigned norm = (unsigned)((256 * count[s]) / total); | |||
if (count[s] != 0 && norm == 0) | |||
@@ -151,7 +166,7 @@ ZSTD_selectEncodingType( | |||
if (mostFrequent == nbSeq) { | |||
*repeatMode = FSE_repeat_none; | |||
if (isDefaultAllowed && nbSeq <= 2) { | |||
/* Prefer set_basic over set_rle when there are 2 or less symbols, | |||
/* Prefer set_basic over set_rle when there are 2 or fewer symbols, | |||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. | |||
* If basic encoding isn't possible, always choose RLE. | |||
*/ | |||
@@ -219,6 +234,11 @@ ZSTD_selectEncodingType( | |||
return set_compressed; | |||
} | |||
typedef struct { | |||
S16 norm[MaxSeq + 1]; | |||
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; | |||
} ZSTD_BuildCTableWksp; | |||
size_t | |||
ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |||
@@ -239,13 +259,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
*op = codeTable[0]; | |||
return 1; | |||
case set_repeat: | |||
memcpy(nextCTable, prevCTable, prevCTableSize); | |||
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); | |||
return 0; | |||
case set_basic: | |||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ | |||
return 0; | |||
case set_compressed: { | |||
S16 norm[MaxSeq + 1]; | |||
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; | |||
size_t nbSeq_1 = nbSeq; | |||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
if (count[codeTable[nbSeq-1]] > 1) { | |||
@@ -253,10 +273,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
nbSeq_1--; | |||
} | |||
assert(nbSeq_1 > 1); | |||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); | |||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ | |||
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); | |||
(void)entropyWorkspaceSize; | |||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed"); | |||
assert(oend >= op); | |||
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */ | |||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); | |||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); | |||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed"); | |||
return NCountSize; | |||
} | |||
} | |||
@@ -290,19 +313,19 @@ ZSTD_encodeSequences_body( | |||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); | |||
if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); | |||
if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
if (longOffsets) { | |||
U32 const ofBits = ofCodeTable[nbSeq-1]; | |||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
if (extraBits) { | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits); | |||
BIT_flushBits(&blockStream); | |||
} | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits, | |||
ofBits - extraBits); | |||
} else { | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); | |||
} | |||
BIT_flushBits(&blockStream); | |||
@@ -316,8 +339,8 @@ ZSTD_encodeSequences_body( | |||
U32 const mlBits = ML_bits[mlCode]; | |||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", | |||
(unsigned)sequences[n].litLength, | |||
(unsigned)sequences[n].matchLength + MINMATCH, | |||
(unsigned)sequences[n].offset); | |||
(unsigned)sequences[n].mlBase + MINMATCH, | |||
(unsigned)sequences[n].offBase); | |||
/* 32b*/ /* 64b*/ | |||
/* (7)*/ /* (7)*/ | |||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ | |||
@@ -328,18 +351,18 @@ ZSTD_encodeSequences_body( | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
BIT_addBits(&blockStream, sequences[n].litLength, llBits); | |||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); | |||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); | |||
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); | |||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); | |||
if (longOffsets) { | |||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
if (extraBits) { | |||
BIT_addBits(&blockStream, sequences[n].offset, extraBits); | |||
BIT_addBits(&blockStream, sequences[n].offBase, extraBits); | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
} | |||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits, | |||
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits, | |||
ofBits - extraBits); /* 31 */ | |||
} else { | |||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ | |||
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ | |||
} | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); | |||
@@ -376,7 +399,7 @@ ZSTD_encodeSequences_default( | |||
#if DYNAMIC_BMI2 | |||
static TARGET_ATTRIBUTE("bmi2") size_t | |||
static BMI2_TARGET_ATTRIBUTE size_t | |||
ZSTD_encodeSequences_bmi2( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -15,288 +15,10 @@ | |||
#include "zstd_internal.h" /* ZSTD_getSequenceLength */ | |||
#include "hist.h" /* HIST_countFast_wksp */ | |||
#include "zstd_compress_internal.h" | |||
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ | |||
#include "zstd_compress_sequences.h" | |||
#include "zstd_compress_literals.h" | |||
/*-************************************* | |||
* Superblock entropy buffer structs | |||
***************************************/ | |||
/** ZSTD_hufCTablesMetadata_t : | |||
* Stores Literals Block Type for a super-block in hType, and | |||
* huffman tree description in hufDesBuffer. | |||
* hufDesSize refers to the size of huffman tree description in bytes. | |||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ | |||
typedef struct { | |||
symbolEncodingType_e hType; | |||
BYTE hufDesBuffer[500]; /* TODO give name to this value */ | |||
size_t hufDesSize; | |||
} ZSTD_hufCTablesMetadata_t; | |||
/** ZSTD_fseCTablesMetadata_t : | |||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and | |||
* fse tables in fseTablesBuffer. | |||
* fseTablesSize refers to the size of fse tables in bytes. | |||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ | |||
typedef struct { | |||
symbolEncodingType_e llType; | |||
symbolEncodingType_e ofType; | |||
symbolEncodingType_e mlType; | |||
BYTE fseTablesBuffer[500]; /* TODO give name to this value */ | |||
size_t fseTablesSize; | |||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ | |||
} ZSTD_fseCTablesMetadata_t; | |||
typedef struct { | |||
ZSTD_hufCTablesMetadata_t hufMetadata; | |||
ZSTD_fseCTablesMetadata_t fseMetadata; | |||
} ZSTD_entropyCTablesMetadata_t; | |||
/** ZSTD_buildSuperBlockEntropy_literal() : | |||
* Builds entropy for the super-block literals. | |||
* Stores literals block type (raw, rle, compressed, repeat) and | |||
* huffman description table to hufMetadata. | |||
* @return : size of huffman description table or error code */ | |||
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, | |||
const ZSTD_hufCTables_t* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
const int disableLiteralsCompression, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
BYTE* const wkspStart = (BYTE*)workspace; | |||
BYTE* const wkspEnd = wkspStart + wkspSize; | |||
BYTE* const countWkspStart = wkspStart; | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); | |||
BYTE* const nodeWksp = countWkspStart + countWkspSize; | |||
const size_t nodeWkspSize = wkspEnd-nodeWksp; | |||
unsigned maxSymbolValue = 255; | |||
unsigned huffLog = HUF_TABLELOG_DEFAULT; | |||
HUF_repeat repeat = prevHuf->repeatMode; | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); | |||
/* Prepare nextEntropy assuming reusing the existing table */ | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
if (disableLiteralsCompression) { | |||
DEBUGLOG(5, "set_basic - disabled"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
/* small ? don't even attempt compression (speed opt) */ | |||
# define COMPRESS_LITERALS_SIZE_MIN 63 | |||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |||
if (srcSize <= minLitSize) { | |||
DEBUGLOG(5, "set_basic - too small"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
} | |||
/* Scan input and build symbol stats */ | |||
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); | |||
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); | |||
if (largest == srcSize) { | |||
DEBUGLOG(5, "set_rle"); | |||
hufMetadata->hType = set_rle; | |||
return 0; | |||
} | |||
if (largest <= (srcSize >> 7)+4) { | |||
DEBUGLOG(5, "set_basic - no gain"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
} | |||
/* Validate the previous Huffman table */ | |||
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { | |||
repeat = HUF_repeat_none; | |||
} | |||
/* Build Huffman Tree */ | |||
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); | |||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); | |||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, | |||
maxSymbolValue, huffLog, | |||
nodeWksp, nodeWkspSize); | |||
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); | |||
huffLog = (U32)maxBits; | |||
{ /* Build and write the CTable */ | |||
size_t const newCSize = HUF_estimateCompressedSize( | |||
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); | |||
size_t const hSize = HUF_writeCTable( | |||
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), | |||
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); | |||
/* Check against repeating the previous CTable */ | |||
if (repeat != HUF_repeat_none) { | |||
size_t const oldCSize = HUF_estimateCompressedSize( | |||
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); | |||
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { | |||
DEBUGLOG(5, "set_repeat - smaller"); | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
hufMetadata->hType = set_repeat; | |||
return 0; | |||
} | |||
} | |||
if (newCSize + hSize >= srcSize) { | |||
DEBUGLOG(5, "set_basic - no gains"); | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); | |||
hufMetadata->hType = set_compressed; | |||
nextHuf->repeatMode = HUF_repeat_check; | |||
return hSize; | |||
} | |||
} | |||
} | |||
/** ZSTD_buildSuperBlockEntropy_sequences() : | |||
* Builds entropy for the super-block sequences. | |||
* Stores symbol compression modes and fse table to fseMetadata. | |||
* @return : size of fse tables or error code */ | |||
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, | |||
const ZSTD_fseCTables_t* prevEntropy, | |||
ZSTD_fseCTables_t* nextEntropy, | |||
const ZSTD_CCtx_params* cctxParams, | |||
ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
BYTE* const wkspStart = (BYTE*)workspace; | |||
BYTE* const wkspEnd = wkspStart + wkspSize; | |||
BYTE* const countWkspStart = wkspStart; | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); | |||
BYTE* const cTableWksp = countWkspStart + countWkspSize; | |||
const size_t cTableWkspSize = wkspEnd-cTableWksp; | |||
ZSTD_strategy const strategy = cctxParams->cParams.strategy; | |||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; | |||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; | |||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; | |||
const BYTE* const ofCodeTable = seqStorePtr->ofCode; | |||
const BYTE* const llCodeTable = seqStorePtr->llCode; | |||
const BYTE* const mlCodeTable = seqStorePtr->mlCode; | |||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; | |||
BYTE* const ostart = fseMetadata->fseTablesBuffer; | |||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); | |||
BYTE* op = ostart; | |||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); | |||
memset(workspace, 0, wkspSize); | |||
fseMetadata->lastCountSize = 0; | |||
/* convert length/distances into codes */ | |||
ZSTD_seqToCodes(seqStorePtr); | |||
/* build CTable for Literal Lengths */ | |||
{ U32 LLtype; | |||
unsigned max = MaxLL; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
DEBUGLOG(5, "Building LL table"); | |||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; | |||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
LLFSELog, prevEntropy->litlengthCTable, | |||
LL_defaultNorm, LL_defaultNormLog, | |||
ZSTD_defaultAllowed, strategy); | |||
assert(set_basic < set_compressed && set_rle < set_compressed); | |||
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |||
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, | |||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); | |||
if (LLtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->llType = (symbolEncodingType_e) LLtype; | |||
} } | |||
/* build CTable for Offsets */ | |||
{ U32 Offtype; | |||
unsigned max = MaxOff; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ | |||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; | |||
DEBUGLOG(5, "Building OF table"); | |||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; | |||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
OffFSELog, prevEntropy->offcodeCTable, | |||
OF_defaultNorm, OF_defaultNormLog, | |||
defaultPolicy, strategy); | |||
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |||
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); | |||
if (Offtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->ofType = (symbolEncodingType_e) Offtype; | |||
} } | |||
/* build CTable for MatchLengths */ | |||
{ U32 MLtype; | |||
unsigned max = MaxML; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); | |||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; | |||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
MLFSELog, prevEntropy->matchlengthCTable, | |||
ML_defaultNorm, ML_defaultNormLog, | |||
ZSTD_defaultAllowed, strategy); | |||
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |||
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, | |||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); | |||
if (MLtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->mlType = (symbolEncodingType_e) MLtype; | |||
} } | |||
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); | |||
return op-ostart; | |||
} | |||
/** ZSTD_buildSuperBlockEntropy() : | |||
* Builds entropy for the super-block. | |||
* @return : 0 on success or error code */ | |||
static size_t | |||
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, | |||
const ZSTD_entropyCTables_t* prevEntropy, | |||
ZSTD_entropyCTables_t* nextEntropy, | |||
const ZSTD_CCtx_params* cctxParams, | |||
ZSTD_entropyCTablesMetadata_t* entropyMetadata, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); | |||
entropyMetadata->hufMetadata.hufDesSize = | |||
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, | |||
&prevEntropy->huf, &nextEntropy->huf, | |||
&entropyMetadata->hufMetadata, | |||
ZSTD_disableLiteralsCompression(cctxParams), | |||
workspace, wkspSize); | |||
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); | |||
entropyMetadata->fseMetadata.fseTablesSize = | |||
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, | |||
&prevEntropy->fse, &nextEntropy->fse, | |||
cctxParams, | |||
&entropyMetadata->fseMetadata, | |||
workspace, wkspSize); | |||
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); | |||
return 0; | |||
} | |||
/** ZSTD_compressSubBlock_literal() : | |||
* Compresses literals section for a sub-block. | |||
* When we have to write the Huffman table we will sometimes choose a header | |||
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, | |||
* before we know the table size + compressed size, so we have a bound on the | |||
* table size. If we guessed incorrectly, we fall back to uncompressed literals. | |||
* | |||
* We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded | |||
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded | |||
* in writing the header, otherwise it is set to 0. | |||
* | |||
* hufMetadata->hType has literals block type info. | |||
@@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, | |||
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block | |||
* and the following sub-blocks' literals sections will be Treeless_Literals_Block. | |||
* @return : compressed size of literals section of a sub-block | |||
* Or 0 if it unable to compress. | |||
* Or 0 if unable to compress. | |||
* Or error code */ | |||
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
const ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
const BYTE* literals, size_t litSize, | |||
void* dst, size_t dstSize, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
static size_t | |||
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
const ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
const BYTE* literals, size_t litSize, | |||
void* dst, size_t dstSize, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
{ | |||
size_t const header = writeEntropy ? 200 : 0; | |||
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); | |||
@@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; | |||
size_t cLitSize = 0; | |||
(void)bmi2; /* TODO bmi2... */ | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); | |||
*entropyWritten = 0; | |||
@@ -348,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); | |||
if (writeEntropy && hufMetadata->hType == set_compressed) { | |||
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); | |||
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); | |||
op += hufMetadata->hufDesSize; | |||
cLitSize += hufMetadata->hufDesSize; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); | |||
} | |||
/* TODO bmi2 */ | |||
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) | |||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); | |||
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0; | |||
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags) | |||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags); | |||
op += cSize; | |||
cLitSize += cSize; | |||
if (cSize == 0 || ERR_isError(cSize)) { | |||
@@ -404,12 +125,17 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
return op-ostart; | |||
} | |||
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { | |||
static size_t | |||
ZSTD_seqDecompressedSize(seqStore_t const* seqStore, | |||
const seqDef* sequences, size_t nbSeq, | |||
size_t litSize, int lastSequence) | |||
{ | |||
const seqDef* const sstart = sequences; | |||
const seqDef* const send = sequences + nbSeq; | |||
const seqDef* sp = sstart; | |||
size_t matchLengthSum = 0; | |||
size_t litLengthSum = 0; | |||
(void)(litLengthSum); /* suppress unused variable warning on some environments */ | |||
while (send-sp > 0) { | |||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); | |||
litLengthSum += seqLen.litLength; | |||
@@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* | |||
* @return : compressed size of sequences section of a sub-block | |||
* Or 0 if it is unable to compress | |||
* Or error code. */ | |||
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, | |||
const ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
const seqDef* sequences, size_t nbSeq, | |||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, | |||
const ZSTD_CCtx_params* cctxParams, | |||
void* dst, size_t dstCapacity, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
static size_t | |||
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, | |||
const ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
const seqDef* sequences, size_t nbSeq, | |||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, | |||
const ZSTD_CCtx_params* cctxParams, | |||
void* dst, size_t dstCapacity, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
{ | |||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; | |||
BYTE* const ostart = (BYTE*)dst; | |||
@@ -474,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables | |||
const U32 MLtype = fseMetadata->mlType; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); | |||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); | |||
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); | |||
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); | |||
op += fseMetadata->fseTablesSize; | |||
} else { | |||
const U32 repeat = set_repeat; | |||
@@ -602,8 +329,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit | |||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, | |||
const BYTE* codeTable, unsigned maxCode, | |||
size_t nbSeq, const FSE_CTable* fseCTable, | |||
const U32* additionalBits, | |||
short const* defaultNorm, U32 defaultNormLog, | |||
const U8* additionalBits, | |||
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
@@ -615,7 +342,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, | |||
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
if (type == set_basic) { | |||
cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); | |||
/* We selected this encoding type, so it must be valid. */ | |||
assert(max <= defaultMax); | |||
cSymbolTypeSizeEstimateInBits = max <= defaultMax | |||
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) | |||
: ERROR(GENERIC); | |||
} else if (type == set_rle) { | |||
cSymbolTypeSizeEstimateInBits = 0; | |||
} else if (type == set_compressed || type == set_repeat) { | |||
@@ -639,19 +370,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, | |||
void* workspace, size_t wkspSize, | |||
int writeEntropy) | |||
{ | |||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ | |||
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ | |||
size_t cSeqSizeEstimate = 0; | |||
if (nbSeq == 0) return sequencesSectionHeaderSize; | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, | |||
nbSeq, fseTables->offcodeCTable, NULL, | |||
OF_defaultNorm, OF_defaultNormLog, | |||
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |||
workspace, wkspSize); | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, | |||
nbSeq, fseTables->litlengthCTable, LL_bits, | |||
LL_defaultNorm, LL_defaultNormLog, | |||
LL_defaultNorm, LL_defaultNormLog, MaxLL, | |||
workspace, wkspSize); | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, | |||
nbSeq, fseTables->matchlengthCTable, ML_bits, | |||
ML_defaultNorm, ML_defaultNormLog, | |||
ML_defaultNorm, ML_defaultNormLog, MaxML, | |||
workspace, wkspSize); | |||
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; | |||
return cSeqSizeEstimate + sequencesSectionHeaderSize; | |||
@@ -747,7 +479,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, | |||
/* I think there is an optimization opportunity here. | |||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful | |||
* since it recalculates estimate from scratch. | |||
* For example, it would recount literal distribution and symbol codes everytime. | |||
* For example, it would recount literal distribution and symbol codes every time. | |||
*/ | |||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, | |||
&nextCBlock->entropy, entropyMetadata, | |||
@@ -790,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, | |||
} while (!lastSequence); | |||
if (writeLitEntropy) { | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); | |||
memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); | |||
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); | |||
} | |||
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { | |||
/* If we haven't written our entropy tables, then we've violated our contract and | |||
@@ -809,11 +541,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, | |||
if (sp < send) { | |||
seqDef const* seq; | |||
repcodes_t rep; | |||
memcpy(&rep, prevCBlock->rep, sizeof(rep)); | |||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); | |||
for (seq = sstart; seq < sp; ++seq) { | |||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); | |||
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); | |||
} | |||
memcpy(nextCBlock->rep, &rep, sizeof(rep)); | |||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); | |||
} | |||
} | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); | |||
@@ -826,12 +558,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, | |||
unsigned lastBlock) { | |||
ZSTD_entropyCTablesMetadata_t entropyMetadata; | |||
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, | |||
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, | |||
&zc->blockState.prevCBlock->entropy, | |||
&zc->blockState.nextCBlock->entropy, | |||
&zc->appliedParams, | |||
&entropyMetadata, | |||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); | |||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); | |||
return ZSTD_compressSubBlock_multi(&zc->seqStore, | |||
zc->blockState.prevCBlock, | |||
@@ -841,5 +573,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, | |||
dst, dstCapacity, | |||
src, srcSize, | |||
zc->bmi2, lastBlock, | |||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); | |||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -35,6 +35,10 @@ extern "C" { | |||
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 | |||
#endif | |||
/* Set our tables and aligneds to align by 64 bytes */ | |||
#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 | |||
/*-************************************* | |||
* Structures | |||
***************************************/ | |||
@@ -44,6 +48,16 @@ typedef enum { | |||
ZSTD_cwksp_alloc_aligned | |||
} ZSTD_cwksp_alloc_phase_e; | |||
/** | |||
* Used to describe whether the workspace is statically allocated (and will not | |||
* necessarily ever be freed), or if it's dynamically allocated and we can | |||
* expect a well-formed caller to free this. | |||
*/ | |||
typedef enum { | |||
ZSTD_cwksp_dynamic_alloc, | |||
ZSTD_cwksp_static_alloc | |||
} ZSTD_cwksp_static_alloc_e; | |||
/** | |||
* Zstd fits all its internal datastructures into a single continuous buffer, | |||
* so that it only needs to perform a single OS allocation (or so that a buffer | |||
@@ -92,7 +106,7 @@ typedef enum { | |||
* | |||
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, | |||
* so that literally everything fits in a single buffer. Note: if present, | |||
* this must be the first object in the workspace, since ZSTD_free{CCtx, | |||
* this must be the first object in the workspace, since ZSTD_customFree{CCtx, | |||
* CDict}() rely on a pointer comparison to see whether one or two frees are | |||
* required. | |||
* | |||
@@ -107,10 +121,11 @@ typedef enum { | |||
* - Tables: these are any of several different datastructures (hash tables, | |||
* chain tables, binary trees) that all respect a common format: they are | |||
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base). | |||
* Their sizes depend on the cparams. | |||
* Their sizes depend on the cparams. These tables are 64-byte aligned. | |||
* | |||
* - Aligned: these buffers are used for various purposes that require 4 byte | |||
* alignment, but don't require any initialization before they're used. | |||
* alignment, but don't require any initialization before they're used. These | |||
* buffers are each aligned to 64 bytes. | |||
* | |||
* - Buffers: these buffers are used for various purposes that don't require | |||
* any alignment or initialization before they're used. This means they can | |||
@@ -123,8 +138,7 @@ typedef enum { | |||
* | |||
* 1. Objects | |||
* 2. Buffers | |||
* 3. Aligned | |||
* 4. Tables | |||
* 3. Aligned/Tables | |||
* | |||
* Attempts to reserve objects of different types out of order will fail. | |||
*/ | |||
@@ -137,9 +151,10 @@ typedef struct { | |||
void* tableValidEnd; | |||
void* allocStart; | |||
int allocFailed; | |||
BYTE allocFailed; | |||
int workspaceOversizedDuration; | |||
ZSTD_cwksp_alloc_phase_e phase; | |||
ZSTD_cwksp_static_alloc_e isStatic; | |||
} ZSTD_cwksp; | |||
/*-************************************* | |||
@@ -176,82 +191,168 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { | |||
* Since tables aren't currently redzoned, you don't need to call through this | |||
* to figure out how much space you need for the matchState tables. Everything | |||
* else is though. | |||
* | |||
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
if (size == 0) | |||
return 0; | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#else | |||
return size; | |||
#endif | |||
} | |||
MEM_STATIC void ZSTD_cwksp_internal_advance_phase( | |||
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { | |||
/** | |||
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. | |||
* Used to determine the number of bytes required for a given "aligned". | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { | |||
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); | |||
} | |||
/** | |||
* Returns the amount of additional space the cwksp must allocate | |||
* for internal purposes (currently only alignment). | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { | |||
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes | |||
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes | |||
* to align the beginning of the aligned section. | |||
* | |||
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and | |||
* aligneds being sized in multiples of 64 bytes. | |||
*/ | |||
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; | |||
return slackSpace; | |||
} | |||
/** | |||
* Return the number of additional bytes required to align a pointer to the given number of bytes. | |||
* alignBytes must be a power of two. | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { | |||
size_t const alignBytesMask = alignBytes - 1; | |||
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; | |||
assert((alignBytes & alignBytesMask) == 0); | |||
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); | |||
return bytes; | |||
} | |||
/** | |||
* Internal function. Do not use directly. | |||
* Reserves the given number of bytes within the aligned/buffer segment of the wksp, | |||
* which counts from the end of the wksp (as opposed to the object/table segment). | |||
* | |||
* Returns a pointer to the beginning of that space. | |||
*/ | |||
MEM_STATIC void* | |||
ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) | |||
{ | |||
void* const alloc = (BYTE*)ws->allocStart - bytes; | |||
void* const bottom = ws->tableEnd; | |||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", | |||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
assert(alloc >= bottom); | |||
if (alloc < bottom) { | |||
DEBUGLOG(4, "cwksp: alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
/* the area is reserved from the end of wksp. | |||
* If it overlaps with tableValidEnd, it voids guarantees on values' range */ | |||
if (alloc < ws->tableValidEnd) { | |||
ws->tableValidEnd = alloc; | |||
} | |||
ws->allocStart = alloc; | |||
return alloc; | |||
} | |||
/** | |||
* Moves the cwksp to the next phase, and does any necessary allocations. | |||
* cwksp initialization must necessarily go through each phase in order. | |||
* Returns a 0 on success, or zstd error | |||
*/ | |||
MEM_STATIC size_t | |||
ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) | |||
{ | |||
assert(phase >= ws->phase); | |||
if (phase > ws->phase) { | |||
/* Going from allocating objects to allocating buffers */ | |||
if (ws->phase < ZSTD_cwksp_alloc_buffers && | |||
phase >= ZSTD_cwksp_alloc_buffers) { | |||
ws->tableValidEnd = ws->objectEnd; | |||
} | |||
/* Going from allocating buffers to allocating aligneds/tables */ | |||
if (ws->phase < ZSTD_cwksp_alloc_aligned && | |||
phase >= ZSTD_cwksp_alloc_aligned) { | |||
/* If unaligned allocations down from a too-large top have left us | |||
* unaligned, we need to realign our alloc ptr. Technically, this | |||
* can consume space that is unaccounted for in the neededSpace | |||
* calculation. However, I believe this can only happen when the | |||
* workspace is too large, and specifically when it is too large | |||
* by a larger margin than the space that will be consumed. */ | |||
/* TODO: cleaner, compiler warning friendly way to do this??? */ | |||
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); | |||
if (ws->allocStart < ws->tableValidEnd) { | |||
ws->tableValidEnd = ws->allocStart; | |||
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ | |||
size_t const bytesToAlign = | |||
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); | |||
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); | |||
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ | |||
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), | |||
memory_allocation, "aligned phase - alignment initial allocation failed!"); | |||
} | |||
} | |||
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ | |||
void* const alloc = ws->objectEnd; | |||
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); | |||
void* const objectEnd = (BYTE*)alloc + bytesToAlign; | |||
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); | |||
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, | |||
"table phase - alignment initial allocation failed!"); | |||
ws->objectEnd = objectEnd; | |||
ws->tableEnd = objectEnd; /* table area starts being empty */ | |||
if (ws->tableValidEnd < ws->tableEnd) { | |||
ws->tableValidEnd = ws->tableEnd; | |||
} } } | |||
ws->phase = phase; | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
return 0; | |||
} | |||
/** | |||
* Returns whether this object/buffer/etc was allocated in this workspace. | |||
*/ | |||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { | |||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) | |||
{ | |||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); | |||
} | |||
/** | |||
* Internal function. Do not use directly. | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_internal( | |||
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { | |||
MEM_STATIC void* | |||
ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) | |||
{ | |||
void* alloc; | |||
void* bottom = ws->tableEnd; | |||
ZSTD_cwksp_internal_advance_phase(ws, phase); | |||
alloc = (BYTE *)ws->allocStart - bytes; | |||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { | |||
return NULL; | |||
} | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* over-reserve space */ | |||
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#endif | |||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", | |||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
assert(alloc >= bottom); | |||
if (alloc < bottom) { | |||
DEBUGLOG(4, "cwksp: alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
if (alloc < ws->tableValidEnd) { | |||
ws->tableValidEnd = alloc; | |||
} | |||
ws->allocStart = alloc; | |||
alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |||
* either size. */ | |||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
if (alloc) { | |||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { | |||
/* We need to keep the redzone poisoned while unpoisoning the bytes that | |||
* are actually allocated. */ | |||
__asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE); | |||
} | |||
} | |||
#endif | |||
return alloc; | |||
@@ -260,33 +361,44 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal( | |||
/** | |||
* Reserves and returns unaligned memory. | |||
*/ | |||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { | |||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) | |||
{ | |||
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); | |||
} | |||
/** | |||
* Reserves and returns memory sized on and aligned on sizeof(unsigned). | |||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { | |||
assert((bytes & (sizeof(U32)-1)) == 0); | |||
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); | |||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) | |||
{ | |||
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), | |||
ZSTD_cwksp_alloc_aligned); | |||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); | |||
return ptr; | |||
} | |||
/** | |||
* Aligned on sizeof(unsigned). These buffers have the special property that | |||
* Aligned on 64 bytes. These buffers have the special property that | |||
* their values remain constrained, allowing us to re-use them without | |||
* memset()-ing them. | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { | |||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) | |||
{ | |||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; | |||
void* alloc = ws->tableEnd; | |||
void* end = (BYTE *)alloc + bytes; | |||
void* top = ws->allocStart; | |||
void* alloc; | |||
void* end; | |||
void* top; | |||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { | |||
return NULL; | |||
} | |||
alloc = ws->tableEnd; | |||
end = (BYTE *)alloc + bytes; | |||
top = ws->allocStart; | |||
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", | |||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |||
assert((bytes & (sizeof(U32)-1)) == 0); | |||
ZSTD_cwksp_internal_advance_phase(ws, phase); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
assert(end <= top); | |||
if (end > top) { | |||
@@ -296,35 +408,41 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { | |||
} | |||
ws->tableEnd = end; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
} | |||
#endif | |||
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); | |||
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); | |||
return alloc; | |||
} | |||
/** | |||
* Aligned on sizeof(void*). | |||
* Note : should happen only once, at workspace first initialization | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { | |||
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); | |||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) | |||
{ | |||
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); | |||
void* alloc = ws->objectEnd; | |||
void* end = (BYTE*)alloc + roundedBytes; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* over-reserve space */ | |||
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#endif | |||
DEBUGLOG(5, | |||
DEBUGLOG(4, | |||
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", | |||
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); | |||
assert(((size_t)alloc & (sizeof(void*)-1)) == 0); | |||
assert((bytes & (sizeof(void*)-1)) == 0); | |||
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0); | |||
assert(bytes % ZSTD_ALIGNOF(void*) == 0); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
/* we must be in the first phase, no advance is possible */ | |||
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { | |||
DEBUGLOG(4, "cwksp: object alloc failed!"); | |||
DEBUGLOG(3, "cwksp: object alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
@@ -332,20 +450,23 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { | |||
ws->tableEnd = end; | |||
ws->tableValidEnd = end; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |||
* either size. */ | |||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
} | |||
#endif | |||
return alloc; | |||
} | |||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { | |||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) | |||
{ | |||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); | |||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
/* To validate that the table re-use logic is sound, and that we don't | |||
* access table space that we haven't cleaned, we re-"poison" the table | |||
* space every time we mark it dirty. */ | |||
@@ -380,7 +501,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { | |||
assert(ws->tableValidEnd >= ws->objectEnd); | |||
assert(ws->tableValidEnd <= ws->allocStart); | |||
if (ws->tableValidEnd < ws->tableEnd) { | |||
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); | |||
ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd)); | |||
} | |||
ZSTD_cwksp_mark_tables_clean(ws); | |||
} | |||
@@ -392,8 +513,12 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { | |||
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: clearing tables!"); | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
{ | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* We don't do this when the workspace is statically allocated, because | |||
* when that is the case, we have no capability to hook into the end of the | |||
* workspace's lifecycle to unpoison the memory. | |||
*/ | |||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { | |||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; | |||
__asan_poison_memory_region(ws->objectEnd, size); | |||
} | |||
@@ -410,7 +535,7 @@ MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { | |||
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: clearing!"); | |||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
/* To validate that the context re-use logic is sound, and that we don't | |||
* access stuff that this compression hasn't initialized, we re-"poison" | |||
* the workspace (or at least the non-static, non-table parts of it) | |||
@@ -421,8 +546,12 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { | |||
} | |||
#endif | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
{ | |||
#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* We don't do this when the workspace is statically allocated, because | |||
* when that is the case, we have no capability to hook into the end of the | |||
* workspace's lifecycle to unpoison the memory. | |||
*/ | |||
if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { | |||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; | |||
__asan_poison_memory_region(ws->objectEnd, size); | |||
} | |||
@@ -442,7 +571,7 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { | |||
* Any existing values in the workspace are ignored (the previously managed | |||
* buffer, if present, must be separately freed). | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { | |||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { | |||
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); | |||
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ | |||
ws->workspace = start; | |||
@@ -450,39 +579,45 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { | |||
ws->objectEnd = ws->workspace; | |||
ws->tableValidEnd = ws->objectEnd; | |||
ws->phase = ZSTD_cwksp_alloc_objects; | |||
ws->isStatic = isStatic; | |||
ZSTD_cwksp_clear(ws); | |||
ws->workspaceOversizedDuration = 0; | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { | |||
void* workspace = ZSTD_malloc(size, customMem); | |||
void* workspace = ZSTD_customMalloc(size, customMem); | |||
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); | |||
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); | |||
ZSTD_cwksp_init(ws, workspace, size); | |||
ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); | |||
return 0; | |||
} | |||
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { | |||
void *ptr = ws->workspace; | |||
DEBUGLOG(4, "cwksp: freeing workspace"); | |||
memset(ws, 0, sizeof(ZSTD_cwksp)); | |||
ZSTD_free(ptr, customMem); | |||
ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); | |||
ZSTD_customFree(ptr, customMem); | |||
} | |||
/** | |||
* Moves the management of a workspace from one cwksp to another. The src cwksp | |||
* is left in an invalid state (src must be re-init()'ed before its used again). | |||
* is left in an invalid state (src must be re-init()'ed before it's used again). | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { | |||
*dst = *src; | |||
memset(src, 0, sizeof(ZSTD_cwksp)); | |||
ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { | |||
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { | |||
return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) | |||
+ (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); | |||
} | |||
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { | |||
return ws->allocFailed; | |||
} | |||
@@ -491,6 +626,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { | |||
* Functions Checking Free Space | |||
***************************************/ | |||
/* ZSTD_alignmentSpaceWithinBounds() : | |||
* Returns if the estimated space needed for a wksp is within an acceptable limit of the | |||
* actual amount of space used. | |||
*/ | |||
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, | |||
size_t const estimatedSpace, int resizedWorkspace) { | |||
if (resizedWorkspace) { | |||
/* Resized/newly allocated wksp should have exact bounds */ | |||
return ZSTD_cwksp_used(ws) == estimatedSpace; | |||
} else { | |||
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes | |||
* than estimatedSpace. See the comments in zstd_cwksp.h for details. | |||
*/ | |||
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); | |||
} | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { | |||
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -14,12 +14,11 @@ | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <string.h> /* memcpy, memmove, memset */ | |||
#include "zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ | |||
#include "cpu.h" /* bmi2 */ | |||
#include "mem.h" /* low level memory routines */ | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#define HUF_STATIC_LINKING_ONLY | |||
#include "huf.h" | |||
#include "zstd_decompress_internal.h" | |||
#include "zstd_ddict.h" | |||
@@ -127,14 +126,14 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, | |||
ddict->dictContent = dict; | |||
if (!dict) dictSize = 0; | |||
} else { | |||
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); | |||
void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); | |||
ddict->dictBuffer = internalBuffer; | |||
ddict->dictContent = internalBuffer; | |||
if (!internalBuffer) return ERROR(memory_allocation); | |||
memcpy(internalBuffer, dict, dictSize); | |||
ZSTD_memcpy(internalBuffer, dict, dictSize); | |||
} | |||
ddict->dictSize = dictSize; | |||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ | |||
ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ | |||
/* parse dictionary content */ | |||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); | |||
@@ -147,9 +146,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, | |||
ZSTD_dictContentType_e dictContentType, | |||
ZSTD_customMem customMem) | |||
{ | |||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL; | |||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; | |||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); | |||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); | |||
if (ddict == NULL) return NULL; | |||
ddict->cMem = customMem; | |||
{ size_t const initResult = ZSTD_initDDict_internal(ddict, | |||
@@ -198,7 +197,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict( | |||
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ | |||
if (sBufferSize < neededSpace) return NULL; | |||
if (dictLoadMethod == ZSTD_dlm_byCopy) { | |||
memcpy(ddict+1, dict, dictSize); /* local copy */ | |||
ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ | |||
dict = ddict+1; | |||
} | |||
if (ZSTD_isError( ZSTD_initDDict_internal(ddict, | |||
@@ -213,8 +212,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) | |||
{ | |||
if (ddict==NULL) return 0; /* support free on NULL */ | |||
{ ZSTD_customMem const cMem = ddict->cMem; | |||
ZSTD_free(ddict->dictBuffer, cMem); | |||
ZSTD_free(ddict, cMem); | |||
ZSTD_customFree(ddict->dictBuffer, cMem); | |||
ZSTD_customFree(ddict, cMem); | |||
return 0; | |||
} | |||
} | |||
@@ -240,5 +239,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) | |||
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) | |||
{ | |||
if (ddict==NULL) return 0; | |||
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); | |||
return ddict->dictID; | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -15,7 +15,7 @@ | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd_deps.h" /* size_t */ | |||
#include "zstd.h" /* ZSTD_DDict, and several public functions */ | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -15,8 +15,8 @@ | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd.h" /* DCtx, and some public functions */ | |||
#include "zstd_deps.h" /* size_t */ | |||
#include "../zstd.h" /* DCtx, and some public functions */ | |||
#include "zstd_internal.h" /* blockProperties_t, and some public functions */ | |||
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ | |||
@@ -33,6 +33,12 @@ | |||
*/ | |||
/* Streaming state is used to inform allocation of the literal buffer */ | |||
typedef enum { | |||
not_streaming = 0, | |||
is_streaming = 1 | |||
} streaming_operation; | |||
/* ZSTD_decompressBlock_internal() : | |||
* decompress block, starting at `src`, | |||
* into destination buffer `dst`. | |||
@@ -41,19 +47,22 @@ | |||
*/ | |||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, const int frame); | |||
const void* src, size_t srcSize, const int frame, const streaming_operation streaming); | |||
/* ZSTD_buildFSETable() : | |||
* generate FSE decoding table for one symbol (ll, ml or off) | |||
* this function must be called with valid parameters only | |||
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) | |||
* in which case it cannot fail. | |||
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is | |||
* defined in zstd_decompress_internal.h. | |||
* Internal use only. | |||
*/ | |||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, | |||
const short* normalizedCounter, unsigned maxSymbolValue, | |||
const U32* baseValue, const U32* nbAdditionalBits, | |||
unsigned tableLog); | |||
const U32* baseValue, const U8* nbAdditionalBits, | |||
unsigned tableLog, void* wksp, size_t wkspSize, | |||
int bmi2); | |||
#endif /* ZSTD_DEC_BLOCK_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -20,33 +20,33 @@ | |||
* Dependencies | |||
*********************************************************/ | |||
#include "mem.h" /* BYTE, U16, U32 */ | |||
#include "zstd_internal.h" /* ZSTD_seqSymbol */ | |||
#include "zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ | |||
/*-******************************************************* | |||
* Constants | |||
*********************************************************/ | |||
static const U32 LL_base[MaxLL+1] = { | |||
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { | |||
0, 1, 2, 3, 4, 5, 6, 7, | |||
8, 9, 10, 11, 12, 13, 14, 15, | |||
16, 18, 20, 22, 24, 28, 32, 40, | |||
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | |||
0x2000, 0x4000, 0x8000, 0x10000 }; | |||
static const U32 OF_base[MaxOff+1] = { | |||
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { | |||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, | |||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, | |||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, | |||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; | |||
static const U32 OF_bits[MaxOff+1] = { | |||
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { | |||
0, 1, 2, 3, 4, 5, 6, 7, | |||
8, 9, 10, 11, 12, 13, 14, 15, | |||
16, 17, 18, 19, 20, 21, 22, 23, | |||
24, 25, 26, 27, 28, 29, 30, 31 }; | |||
static const U32 ML_base[MaxML+1] = { | |||
static UNUSED_ATTR const U32 ML_base[MaxML+1] = { | |||
3, 4, 5, 6, 7, 8, 9, 10, | |||
11, 12, 13, 14, 15, 16, 17, 18, | |||
19, 20, 21, 22, 23, 24, 25, 26, | |||
@@ -73,12 +73,17 @@ static const U32 ML_base[MaxML+1] = { | |||
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) | |||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) | |||
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) | |||
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 | |||
typedef struct { | |||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ | |||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ | |||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ | |||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ | |||
HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ | |||
U32 rep[ZSTD_REP_NUM]; | |||
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; | |||
} ZSTD_entropyDTables_t; | |||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, | |||
@@ -95,10 +100,28 @@ typedef enum { | |||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ | |||
} ZSTD_dictUses_e; | |||
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ | |||
typedef struct { | |||
const ZSTD_DDict** ddictPtrTable; | |||
size_t ddictPtrTableSize; | |||
size_t ddictPtrCount; | |||
} ZSTD_DDictHashSet; | |||
#ifndef ZSTD_DECODER_INTERNAL_BUFFER | |||
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) | |||
#endif | |||
#define ZSTD_LBMIN 64 | |||
#define ZSTD_LBMAX (128 << 10) | |||
/* extra buffer, compensates when dst is not large enough to store litBuffer */ | |||
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) | |||
typedef enum { | |||
ZSTD_obm_buffered = 0, /* Buffer the output */ | |||
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ | |||
} ZSTD_outBufferMode_e; | |||
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ | |||
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ | |||
ZSTD_split = 2 /* Split between litExtraBuffer and dst */ | |||
} ZSTD_litLocation_e; | |||
struct ZSTD_DCtx_s | |||
{ | |||
@@ -114,6 +137,7 @@ struct ZSTD_DCtx_s | |||
const void* dictEnd; /* end of previous segment */ | |||
size_t expected; | |||
ZSTD_frameHeader fParams; | |||
U64 processedCSize; | |||
U64 decodedSize; | |||
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ | |||
ZSTD_dStage stage; | |||
@@ -122,12 +146,16 @@ struct ZSTD_DCtx_s | |||
XXH64_state_t xxhState; | |||
size_t headerSize; | |||
ZSTD_format_e format; | |||
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ | |||
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ | |||
const BYTE* litPtr; | |||
ZSTD_customMem customMem; | |||
size_t litSize; | |||
size_t rleSize; | |||
size_t staticSize; | |||
#if DYNAMIC_BMI2 != 0 | |||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ | |||
#endif | |||
/* dictionary */ | |||
ZSTD_DDict* ddictLocal; | |||
@@ -135,6 +163,9 @@ struct ZSTD_DCtx_s | |||
U32 dictID; | |||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ | |||
ZSTD_dictUses_e dictUses; | |||
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ | |||
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ | |||
int disableHufAsm; | |||
/* streaming */ | |||
ZSTD_dStreamStage streamStage; | |||
@@ -147,16 +178,21 @@ struct ZSTD_DCtx_s | |||
size_t outStart; | |||
size_t outEnd; | |||
size_t lhSize; | |||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) | |||
void* legacyContext; | |||
U32 previousLegacyVersion; | |||
U32 legacyVersion; | |||
#endif | |||
U32 hostageByte; | |||
int noForwardProgress; | |||
ZSTD_outBufferMode_e outBufferMode; | |||
ZSTD_bufferMode_e outBufferMode; | |||
ZSTD_outBuffer expectedOutBuffer; | |||
/* workspace */ | |||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; | |||
BYTE* litBuffer; | |||
const BYTE* litBufferEnd; | |||
ZSTD_litLocation_e litBufferLocation; | |||
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ | |||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; | |||
size_t oversizedDuration; | |||
@@ -165,8 +201,21 @@ struct ZSTD_DCtx_s | |||
void const* dictContentBeginForFuzzing; | |||
void const* dictContentEndForFuzzing; | |||
#endif | |||
/* Tracing */ | |||
#if ZSTD_TRACE | |||
ZSTD_TraceCtx traceCtx; | |||
#endif | |||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ | |||
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { | |||
#if DYNAMIC_BMI2 != 0 | |||
return dctx->bmi2; | |||
#else | |||
(void)dctx; | |||
return 0; | |||
#endif | |||
} | |||
/*-******************************************************* | |||
* Shared internal functions | |||
@@ -183,7 +232,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, | |||
* If yes, do nothing (continue on current segment). | |||
* If not, classify previous segment as "external dictionary", and start a new segment. | |||
* This function cannot fail. */ | |||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); | |||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); | |||
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ |
@@ -0,0 +1,111 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/* This file provides common libc dependencies that zstd requires. | |||
* The purpose is to allow replacing this file with a custom implementation | |||
* to compile zstd without libc support. | |||
*/ | |||
/* Need: | |||
* NULL | |||
* INT_MAX | |||
* UINT_MAX | |||
* ZSTD_memcpy() | |||
* ZSTD_memset() | |||
* ZSTD_memmove() | |||
*/ | |||
#ifndef ZSTD_DEPS_COMMON | |||
#define ZSTD_DEPS_COMMON | |||
#include <limits.h> | |||
#include <stddef.h> | |||
#include <string.h> | |||
#if defined(__GNUC__) && __GNUC__ >= 4 | |||
# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) | |||
# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) | |||
# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) | |||
#else | |||
# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) | |||
# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) | |||
# define ZSTD_memset(p,v,l) memset((p),(v),(l)) | |||
#endif | |||
#endif /* ZSTD_DEPS_COMMON */ | |||
/* Need: | |||
* ZSTD_malloc() | |||
* ZSTD_free() | |||
* ZSTD_calloc() | |||
*/ | |||
#ifdef ZSTD_DEPS_NEED_MALLOC | |||
#ifndef ZSTD_DEPS_MALLOC | |||
#define ZSTD_DEPS_MALLOC | |||
#include <stdlib.h> | |||
#define ZSTD_malloc(s) malloc(s) | |||
#define ZSTD_calloc(n,s) calloc((n), (s)) | |||
#define ZSTD_free(p) free((p)) | |||
#endif /* ZSTD_DEPS_MALLOC */ | |||
#endif /* ZSTD_DEPS_NEED_MALLOC */ | |||
/* | |||
* Provides 64-bit math support. | |||
* Need: | |||
* U64 ZSTD_div64(U64 dividend, U32 divisor) | |||
*/ | |||
#ifdef ZSTD_DEPS_NEED_MATH64 | |||
#ifndef ZSTD_DEPS_MATH64 | |||
#define ZSTD_DEPS_MATH64 | |||
#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) | |||
#endif /* ZSTD_DEPS_MATH64 */ | |||
#endif /* ZSTD_DEPS_NEED_MATH64 */ | |||
/* Need: | |||
* assert() | |||
*/ | |||
#ifdef ZSTD_DEPS_NEED_ASSERT | |||
#ifndef ZSTD_DEPS_ASSERT | |||
#define ZSTD_DEPS_ASSERT | |||
#include <assert.h> | |||
#endif /* ZSTD_DEPS_ASSERT */ | |||
#endif /* ZSTD_DEPS_NEED_ASSERT */ | |||
/* Need: | |||
* ZSTD_DEBUG_PRINT() | |||
*/ | |||
#ifdef ZSTD_DEPS_NEED_IO | |||
#ifndef ZSTD_DEPS_IO | |||
#define ZSTD_DEPS_IO | |||
#include <stdio.h> | |||
#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) | |||
#endif /* ZSTD_DEPS_IO */ | |||
#endif /* ZSTD_DEPS_NEED_IO */ | |||
/* Only requested when <stdint.h> is known to be present. | |||
* Need: | |||
* intptr_t | |||
*/ | |||
#ifdef ZSTD_DEPS_NEED_STDINT | |||
#ifndef ZSTD_DEPS_STDINT | |||
#define ZSTD_DEPS_STDINT | |||
#include <stdint.h> | |||
#endif /* ZSTD_DEPS_STDINT */ | |||
#endif /* ZSTD_DEPS_NEED_STDINT */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,8 +11,43 @@ | |||
#include "zstd_compress_internal.h" | |||
#include "zstd_double_fast.h" | |||
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm) | |||
{ | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashLarge = ms->hashTable; | |||
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; | |||
U32 const mls = cParams->minMatch; | |||
U32* const hashSmall = ms->chainTable; | |||
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* ip = base + ms->nextToUpdate; | |||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; | |||
const U32 fastHashFillStep = 3; | |||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
/* Always insert every fastHashFillStep position into the hash tables. | |||
* Insert the other positions into the large hash table if their entry | |||
* is empty. | |||
*/ | |||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { | |||
U32 const curr = (U32)(ip - base); | |||
U32 i; | |||
for (i = 0; i < fastHashFillStep; ++i) { | |||
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); | |||
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); | |||
if (i == 0) { | |||
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); | |||
} | |||
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { | |||
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); | |||
} | |||
/* Only load extra positions for ZSTD_dtlm_full */ | |||
if (dtlm == ZSTD_dtlm_fast) | |||
break; | |||
} } | |||
} | |||
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm) | |||
{ | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
@@ -31,27 +66,249 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
* is empty. | |||
*/ | |||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { | |||
U32 const current = (U32)(ip - base); | |||
U32 const curr = (U32)(ip - base); | |||
U32 i; | |||
for (i = 0; i < fastHashFillStep; ++i) { | |||
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); | |||
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); | |||
if (i == 0) | |||
hashSmall[smHash] = current + i; | |||
hashSmall[smHash] = curr + i; | |||
if (i == 0 || hashLarge[lgHash] == 0) | |||
hashLarge[lgHash] = current + i; | |||
hashLarge[lgHash] = curr + i; | |||
/* Only load extra positions for ZSTD_dtlm_full */ | |||
if (dtlm == ZSTD_dtlm_fast) | |||
break; | |||
} } | |||
} } | |||
} | |||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
const void* const end, | |||
ZSTD_dictTableLoadMethod_e dtlm, | |||
ZSTD_tableFillPurpose_e tfp) | |||
{ | |||
if (tfp == ZSTD_tfp_forCDict) { | |||
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); | |||
} else { | |||
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); | |||
} | |||
} | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_doubleFast_noDict_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, U32 const mls /* template */) | |||
{ | |||
ZSTD_compressionParameters const* cParams = &ms->cParams; | |||
U32* const hashLong = ms->hashTable; | |||
const U32 hBitsL = cParams->hashLog; | |||
U32* const hashSmall = ms->chainTable; | |||
const U32 hBitsS = cParams->chainLog; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* anchor = istart; | |||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
/* presumes that, if there is a dictionary, it must be using Attach mode */ | |||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); | |||
const BYTE* const prefixLowest = base + prefixLowestIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
U32 offsetSaved1 = 0, offsetSaved2 = 0; | |||
size_t mLength; | |||
U32 offset; | |||
U32 curr; | |||
/* how many positions to search before increasing step size */ | |||
const size_t kStepIncr = 1 << kSearchStrength; | |||
/* the position at which to increment the step size if no match is found */ | |||
const BYTE* nextStep; | |||
size_t step; /* the current step size */ | |||
size_t hl0; /* the long hash at ip */ | |||
size_t hl1; /* the long hash at ip1 */ | |||
U32 idxl0; /* the long match index for ip */ | |||
U32 idxl1; /* the long match index for ip1 */ | |||
const BYTE* matchl0; /* the long match for ip */ | |||
const BYTE* matchs0; /* the short match for ip */ | |||
const BYTE* matchl1; /* the long match for ip1 */ | |||
const BYTE* ip = istart; /* the current position */ | |||
const BYTE* ip1; /* the next position */ | |||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); | |||
/* init */ | |||
ip += ((ip - prefixLowest) == 0); | |||
{ | |||
U32 const current = (U32)(ip - base); | |||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); | |||
U32 const maxRep = current - windowLow; | |||
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; | |||
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; | |||
} | |||
/* Outer Loop: one iteration per match found and stored */ | |||
while (1) { | |||
step = 1; | |||
nextStep = ip + kStepIncr; | |||
ip1 = ip + step; | |||
if (ip1 > ilimit) { | |||
goto _cleanup; | |||
} | |||
hl0 = ZSTD_hashPtr(ip, hBitsL, 8); | |||
idxl0 = hashLong[hl0]; | |||
matchl0 = base + idxl0; | |||
/* Inner Loop: one iteration per search / position */ | |||
do { | |||
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); | |||
const U32 idxs0 = hashSmall[hs0]; | |||
curr = (U32)(ip-base); | |||
matchs0 = base + idxs0; | |||
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ | |||
/* check noDict repcode */ | |||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { | |||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); | |||
goto _match_stored; | |||
} | |||
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); | |||
if (idxl0 > prefixLowestIndex) { | |||
/* check prefix long match */ | |||
if (MEM_read64(matchl0) == MEM_read64(ip)) { | |||
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; | |||
offset = (U32)(ip-matchl0); | |||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} | |||
idxl1 = hashLong[hl1]; | |||
matchl1 = base + idxl1; | |||
if (idxs0 > prefixLowestIndex) { | |||
/* check prefix short match */ | |||
if (MEM_read32(matchs0) == MEM_read32(ip)) { | |||
goto _search_next_long; | |||
} | |||
} | |||
if (ip1 >= nextStep) { | |||
PREFETCH_L1(ip1 + 64); | |||
PREFETCH_L1(ip1 + 128); | |||
step++; | |||
nextStep += kStepIncr; | |||
} | |||
ip = ip1; | |||
ip1 += step; | |||
hl0 = hl1; | |||
idxl0 = idxl1; | |||
matchl0 = matchl1; | |||
#if defined(__aarch64__) | |||
PREFETCH_L1(ip+256); | |||
#endif | |||
} while (ip1 <= ilimit); | |||
_cleanup: | |||
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), | |||
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ | |||
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; | |||
/* save reps for next block */ | |||
rep[0] = offset_1 ? offset_1 : offsetSaved1; | |||
rep[1] = offset_2 ? offset_2 : offsetSaved2; | |||
/* Return the last literals size */ | |||
return (size_t)(iend - anchor); | |||
_search_next_long: | |||
/* check prefix long +1 match */ | |||
if (idxl1 > prefixLowestIndex) { | |||
if (MEM_read64(matchl1) == MEM_read64(ip1)) { | |||
ip = ip1; | |||
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; | |||
offset = (U32)(ip-matchl1); | |||
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} | |||
/* if no long +1 match, explore the short match we found */ | |||
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; | |||
offset = (U32)(ip - matchs0); | |||
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ | |||
/* fall-through */ | |||
_match_found: /* requires ip, offset, mLength */ | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
if (step < 4) { | |||
/* It is unsafe to write this value back to the hashtable when ip1 is | |||
* greater than or equal to the new ip we will have after we're done | |||
* processing this match. Rather than perform that test directly | |||
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler | |||
* more predictable test. The minmatch even if we take a short match is | |||
* 4 bytes, so as long as step, the distance between ip and ip1 | |||
* (initially) is less than 4, we know ip1 < new ip. */ | |||
hashLong[hl1] = (U32)(ip1 - base); | |||
} | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); | |||
_match_stored: | |||
/* match found */ | |||
ip += mLength; | |||
anchor = ip; | |||
if (ip <= ilimit) { | |||
/* Complementary insertion */ | |||
/* done after iLimit test, as candidates could be > iend-8 */ | |||
{ U32 const indexToInsert = curr+2; | |||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |||
} | |||
/* check immediate repcode */ | |||
while ( (ip <= ilimit) | |||
&& ( (offset_2>0) | |||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); | |||
ip += rLength; | |||
anchor = ip; | |||
continue; /* faster when present ... (?) */ | |||
} | |||
} | |||
} | |||
} | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_doubleFast_generic( | |||
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, | |||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode) | |||
U32 const mls /* template */) | |||
{ | |||
ZSTD_compressionParameters const* cParams = &ms->cParams; | |||
U32* const hashLong = ms->hashTable; | |||
@@ -69,57 +326,39 @@ size_t ZSTD_compressBlock_doubleFast_generic( | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
U32 offsetSaved = 0; | |||
const ZSTD_matchState_t* const dms = ms->dictMatchState; | |||
const ZSTD_compressionParameters* const dictCParams = | |||
dictMode == ZSTD_dictMatchState ? | |||
&dms->cParams : NULL; | |||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? | |||
dms->hashTable : NULL; | |||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? | |||
dms->chainTable : NULL; | |||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? | |||
dms->window.dictLimit : 0; | |||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? | |||
dms->window.base : NULL; | |||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? | |||
dictBase + dictStartIndex : NULL; | |||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? | |||
dms->window.nextSrc : NULL; | |||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? | |||
prefixLowestIndex - (U32)(dictEnd - dictBase) : | |||
0; | |||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? | |||
dictCParams->hashLog : hBitsL; | |||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? | |||
dictCParams->chainLog : hBitsS; | |||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams; | |||
const U32* const dictHashLong = dms->hashTable; | |||
const U32* const dictHashSmall = dms->chainTable; | |||
const U32 dictStartIndex = dms->window.dictLimit; | |||
const BYTE* const dictBase = dms->window.base; | |||
const BYTE* const dictStart = dictBase + dictStartIndex; | |||
const BYTE* const dictEnd = dms->window.nextSrc; | |||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); | |||
const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; | |||
const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; | |||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); | |||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); | |||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); | |||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); | |||
/* if a dictionary is attached, it must be within window range */ | |||
if (dictMode == ZSTD_dictMatchState) { | |||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); | |||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); | |||
if (ms->prefetchCDictTables) { | |||
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); | |||
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); | |||
PREFETCH_AREA(dictHashLong, hashTableBytes) | |||
PREFETCH_AREA(dictHashSmall, chainTableBytes) | |||
} | |||
/* init */ | |||
ip += (dictAndPrefixLength == 0); | |||
if (dictMode == ZSTD_noDict) { | |||
U32 const current = (U32)(ip - base); | |||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); | |||
U32 const maxRep = current - windowLow; | |||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |||
} | |||
if (dictMode == ZSTD_dictMatchState) { | |||
/* dictMatchState repCode checks don't currently handle repCode == 0 | |||
* disabling. */ | |||
assert(offset_1 <= dictAndPrefixLength); | |||
assert(offset_2 <= dictAndPrefixLength); | |||
} | |||
/* dictMatchState repCode checks don't currently handle repCode == 0 | |||
* disabling. */ | |||
assert(offset_1 <= dictAndPrefixLength); | |||
assert(offset_2 <= dictAndPrefixLength); | |||
/* Main Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |||
@@ -127,37 +366,30 @@ size_t ZSTD_compressBlock_doubleFast_generic( | |||
U32 offset; | |||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); | |||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); | |||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); | |||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); | |||
U32 const current = (U32)(ip-base); | |||
size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); | |||
size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); | |||
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; | |||
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; | |||
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); | |||
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); | |||
U32 const curr = (U32)(ip-base); | |||
U32 const matchIndexL = hashLong[h2]; | |||
U32 matchIndexS = hashSmall[h]; | |||
const BYTE* matchLong = base + matchIndexL; | |||
const BYTE* match = base + matchIndexS; | |||
const U32 repIndex = current + 1 - offset_1; | |||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState | |||
&& repIndex < prefixLowestIndex) ? | |||
const U32 repIndex = curr + 1 - offset_1; | |||
const BYTE* repMatch = (repIndex < prefixLowestIndex) ? | |||
dictBase + (repIndex - dictIndexDelta) : | |||
base + repIndex; | |||
hashLong[h2] = hashSmall[h] = current; /* update hash tables */ | |||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ | |||
/* check dictMatchState repcode */ | |||
if (dictMode == ZSTD_dictMatchState | |||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) | |||
/* check repcode */ | |||
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
goto _match_stored; | |||
} | |||
/* check noDict repcode */ | |||
if ( dictMode == ZSTD_noDict | |||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { | |||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); | |||
goto _match_stored; | |||
} | |||
@@ -169,15 +401,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( | |||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
} else if (dictTagsMatchL) { | |||
/* check dictMatchState long match */ | |||
U32 const dictMatchIndexL = dictHashLong[dictHL]; | |||
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; | |||
const BYTE* dictMatchL = dictBase + dictMatchIndexL; | |||
assert(dictMatchL < dictEnd); | |||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { | |||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; | |||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta); | |||
offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); | |||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} } | |||
@@ -187,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_generic( | |||
if (MEM_read32(match) == MEM_read32(ip)) { | |||
goto _search_next_long; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
} else if (dictTagsMatchS) { | |||
/* check dictMatchState short match */ | |||
U32 const dictMatchIndexS = dictHashSmall[dictHS]; | |||
U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; | |||
match = dictBase + dictMatchIndexS; | |||
matchIndexS = dictMatchIndexS + dictIndexDelta; | |||
@@ -204,12 +436,13 @@ size_t ZSTD_compressBlock_doubleFast_generic( | |||
continue; | |||
_search_next_long: | |||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); | |||
size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); | |||
U32 const matchIndexL3 = hashLong[hl3]; | |||
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; | |||
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); | |||
const BYTE* matchL3 = base + matchIndexL3; | |||
hashLong[hl3] = current + 1; | |||
hashLong[hl3] = curr + 1; | |||
/* check prefix long +1 match */ | |||
if (matchIndexL3 > prefixLowestIndex) { | |||
@@ -220,23 +453,23 @@ _search_next_long: | |||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
} else if (dictTagsMatchL3) { | |||
/* check dict long +1 match */ | |||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; | |||
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; | |||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; | |||
assert(dictMatchL3 < dictEnd); | |||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { | |||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; | |||
ip++; | |||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); | |||
offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); | |||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} } } | |||
/* if no long +1 match, explore the short match we found */ | |||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { | |||
if (matchIndexS < prefixLowestIndex) { | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; | |||
offset = (U32)(current - matchIndexS); | |||
offset = (U32)(curr - matchIndexS); | |||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} else { | |||
mLength = ZSTD_count(ip+4, match+4, iend) + 4; | |||
@@ -244,13 +477,11 @@ _search_next_long: | |||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} | |||
/* fall-through */ | |||
_match_found: | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); | |||
_match_stored: | |||
/* match found */ | |||
@@ -260,7 +491,7 @@ _match_stored: | |||
if (ip <= ilimit) { | |||
/* Complementary insertion */ | |||
/* done after iLimit test, as candidates could be > iend-8 */ | |||
{ U32 const indexToInsert = current+2; | |||
{ U32 const indexToInsert = curr+2; | |||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
@@ -268,53 +499,55 @@ _match_stored: | |||
} | |||
/* check immediate repcode */ | |||
if (dictMode == ZSTD_dictMatchState) { | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState | |||
&& repIndex2 < prefixLowestIndex ? | |||
dictBase + repIndex2 - dictIndexDelta : | |||
base + repIndex2; | |||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; | |||
} | |||
break; | |||
} } | |||
if (dictMode == ZSTD_noDict) { | |||
while ( (ip <= ilimit) | |||
&& ( (offset_2>0) | |||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); | |||
ip += rLength; | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? | |||
dictBase + repIndex2 - dictIndexDelta : | |||
base + repIndex2; | |||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; /* faster when present ... (?) */ | |||
} } } | |||
continue; | |||
} | |||
break; | |||
} | |||
} | |||
} /* while (ip < ilimit) */ | |||
/* save reps for next block */ | |||
rep[0] = offset_1 ? offset_1 : offsetSaved; | |||
rep[1] = offset_2 ? offset_2 : offsetSaved; | |||
rep[0] = offset_1; | |||
rep[1] = offset_2; | |||
/* Return the last literals size */ | |||
return (size_t)(iend - anchor); | |||
} | |||
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \ | |||
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ | |||
void const* src, size_t srcSize) \ | |||
{ \ | |||
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | |||
} | |||
ZSTD_GEN_DFAST_FN(noDict, 4) | |||
ZSTD_GEN_DFAST_FN(noDict, 5) | |||
ZSTD_GEN_DFAST_FN(noDict, 6) | |||
ZSTD_GEN_DFAST_FN(noDict, 7) | |||
ZSTD_GEN_DFAST_FN(dictMatchState, 4) | |||
ZSTD_GEN_DFAST_FN(dictMatchState, 5) | |||
ZSTD_GEN_DFAST_FN(dictMatchState, 6) | |||
ZSTD_GEN_DFAST_FN(dictMatchState, 7) | |||
size_t ZSTD_compressBlock_doubleFast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
@@ -325,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast( | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); | |||
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); | |||
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); | |||
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); | |||
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); | |||
} | |||
} | |||
@@ -345,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState( | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); | |||
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); | |||
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); | |||
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); | |||
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); | |||
} | |||
} | |||
@@ -387,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */ | |||
if (prefixStartIndex == dictStartIndex) | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); | |||
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); | |||
/* Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |||
@@ -401,31 +634,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* matchLong = matchLongBase + matchLongIndex; | |||
const U32 current = (U32)(ip-base); | |||
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ | |||
const U32 curr = (U32)(ip-base); | |||
const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ | |||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* const repMatch = repBase + repIndex; | |||
size_t mLength; | |||
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ | |||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ | |||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ | |||
& (repIndex > dictStartIndex)) | |||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); | |||
} else { | |||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { | |||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; | |||
U32 offset; | |||
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; | |||
offset = current - matchLongIndex; | |||
offset = curr - matchLongIndex; | |||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); | |||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { | |||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |||
@@ -433,24 +666,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; | |||
const BYTE* match3 = match3Base + matchIndex3; | |||
U32 offset; | |||
hashLong[h3] = current + 1; | |||
hashLong[h3] = curr + 1; | |||
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { | |||
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; | |||
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; | |||
ip++; | |||
offset = current+1 - matchIndex3; | |||
offset = curr+1 - matchIndex3; | |||
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ | |||
} else { | |||
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |||
offset = current - matchIndex; | |||
offset = curr - matchIndex; | |||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); | |||
} else { | |||
ip += ((ip-anchor) >> kSearchStrength) + 1; | |||
@@ -464,7 +697,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
if (ip <= ilimit) { | |||
/* Complementary insertion */ | |||
/* done after iLimit test, as candidates could be > iend-8 */ | |||
{ U32 const indexToInsert = current+2; | |||
{ U32 const indexToInsert = curr+2; | |||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
@@ -477,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ | |||
& (repIndex2 > dictStartIndex)) | |||
& (offset_2 <= current2 - dictStartIndex)) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |||
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; | |||
ip += repLength2; | |||
@@ -500,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
return (size_t)(iend - anchor); | |||
} | |||
ZSTD_GEN_DFAST_FN(extDict, 4) | |||
ZSTD_GEN_DFAST_FN(extDict, 5) | |||
ZSTD_GEN_DFAST_FN(extDict, 6) | |||
ZSTD_GEN_DFAST_FN(extDict, 7) | |||
size_t ZSTD_compressBlock_doubleFast_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
@@ -510,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict( | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); | |||
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); | |||
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); | |||
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); | |||
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -19,7 +19,8 @@ extern "C" { | |||
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ | |||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm); | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm, | |||
ZSTD_tableFillPurpose_e tfp); | |||
size_t ZSTD_compressBlock_doubleFast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -20,19 +20,31 @@ extern "C" { | |||
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ | |||
#ifndef ZSTDERRORLIB_VISIBILITY | |||
# if defined(__GNUC__) && (__GNUC__ >= 4) | |||
# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) | |||
#ifndef ZSTDERRORLIB_VISIBLE | |||
/* Backwards compatibility with old macro name */ | |||
# ifdef ZSTDERRORLIB_VISIBILITY | |||
# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY | |||
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) | |||
# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default"))) | |||
# else | |||
# define ZSTDERRORLIB_VISIBILITY | |||
# define ZSTDERRORLIB_VISIBLE | |||
# endif | |||
#endif | |||
#ifndef ZSTDERRORLIB_HIDDEN | |||
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) | |||
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) | |||
# else | |||
# define ZSTDERRORLIB_HIDDEN | |||
# endif | |||
#endif | |||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY | |||
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE | |||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
#else | |||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY | |||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE | |||
#endif | |||
/*-********************************************* | |||
@@ -58,14 +70,17 @@ typedef enum { | |||
ZSTD_error_frameParameter_windowTooLarge = 16, | |||
ZSTD_error_corruption_detected = 20, | |||
ZSTD_error_checksum_wrong = 22, | |||
ZSTD_error_literals_headerWrong = 24, | |||
ZSTD_error_dictionary_corrupted = 30, | |||
ZSTD_error_dictionary_wrong = 32, | |||
ZSTD_error_dictionaryCreation_failed = 34, | |||
ZSTD_error_parameter_unsupported = 40, | |||
ZSTD_error_parameter_combination_unsupported = 41, | |||
ZSTD_error_parameter_outOfBound = 42, | |||
ZSTD_error_tableLog_tooLarge = 44, | |||
ZSTD_error_maxSymbolValue_tooLarge = 46, | |||
ZSTD_error_maxSymbolValue_tooSmall = 48, | |||
ZSTD_error_stabilityCondition_notRespected = 50, | |||
ZSTD_error_stage_wrong = 60, | |||
ZSTD_error_init_missing = 62, | |||
ZSTD_error_memory_allocation = 64, | |||
@@ -73,10 +88,15 @@ typedef enum { | |||
ZSTD_error_dstSize_tooSmall = 70, | |||
ZSTD_error_srcSize_wrong = 72, | |||
ZSTD_error_dstBuffer_null = 74, | |||
ZSTD_error_noForwardProgress_destFull = 80, | |||
ZSTD_error_noForwardProgress_inputEmpty = 82, | |||
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ | |||
ZSTD_error_frameIndex_tooLarge = 100, | |||
ZSTD_error_seekableIO = 102, | |||
ZSTD_error_dstBuffer_wrong = 104, | |||
ZSTD_error_srcBuffer_wrong = 105, | |||
ZSTD_error_sequenceProducer_failed = 106, | |||
ZSTD_error_externalSequences_invalid = 107, | |||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ | |||
} ZSTD_ErrorCode; | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -19,7 +19,8 @@ extern "C" { | |||
#include "zstd_compress_internal.h" | |||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm); | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm, | |||
ZSTD_tableFillPurpose_e tfp); | |||
size_t ZSTD_compressBlock_fast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -19,25 +19,25 @@ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#ifdef __aarch64__ | |||
#include <arm_neon.h> | |||
#endif | |||
#include "compiler.h" | |||
#include "cpu.h" | |||
#include "mem.h" | |||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ | |||
#include "error_private.h" | |||
#ifndef ZSTD_STATIC_LINKING_ONLY | |||
#define ZSTD_STATIC_LINKING_ONLY | |||
#endif | |||
#include "zstd.h" | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#define HUF_STATIC_LINKING_ONLY | |||
#include "huf.h" | |||
#ifndef XXH_STATIC_LINKING_ONLY | |||
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ | |||
#endif | |||
#include "xxhash.h" /* XXH_reset, update, digest */ | |||
#ifndef ZSTD_NO_TRACE | |||
# include "zstd_trace.h" | |||
#else | |||
# define ZSTD_TRACE 0 | |||
#endif | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
@@ -57,81 +57,7 @@ extern "C" { | |||
#undef MAX | |||
#define MIN(a,b) ((a)<(b) ? (a) : (b)) | |||
#define MAX(a,b) ((a)>(b) ? (a) : (b)) | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* This is a helper function to help force C99-correctness during compilation. | |||
* Under strict compilation modes, variadic macro arguments can't be empty. | |||
* However, variadic function arguments can be. Using a function therefore lets | |||
* us statically check that at least one (string) argument was passed, | |||
* independent of the compilation flags. | |||
*/ | |||
static INLINE_KEYWORD UNUSED_ATTR | |||
void _force_has_format_string(const char *format, ...) { | |||
(void)format; | |||
} | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* We want to force this function invocation to be syntactically correct, but | |||
* we don't want to force runtime evaluation of its arguments. | |||
*/ | |||
#define _FORCE_HAS_FORMAT_STRING(...) \ | |||
if (0) { \ | |||
_force_has_format_string(__VA_ARGS__); \ | |||
} | |||
/** | |||
* Return the specified error if the condition evaluates to true. | |||
* | |||
* In debug modes, prints additional information. | |||
* In order to do that (particularly, printing the conditional that failed), | |||
* this can't just wrap RETURN_ERROR(). | |||
*/ | |||
#define RETURN_ERROR_IF(cond, err, ...) \ | |||
if (cond) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} | |||
/** | |||
* Unconditionally return the specified error. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define RETURN_ERROR(err, ...) \ | |||
do { \ | |||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} while(0); | |||
/** | |||
* If the provided expression evaluates to an error code, returns that error code. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define FORWARD_IF_ERROR(err, ...) \ | |||
do { \ | |||
size_t const err_code = (err); \ | |||
if (ERR_isError(err_code)) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return err_code; \ | |||
} \ | |||
} while(0); | |||
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) | |||
/*-************************************* | |||
@@ -140,8 +66,7 @@ void _force_has_format_string(const char *format, ...) { | |||
#define ZSTD_OPT_NUM (1<<12) | |||
#define ZSTD_REP_NUM 3 /* number of repcodes */ | |||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) | |||
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; | |||
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; | |||
#define KB *(1 <<10) | |||
#define MB *(1 <<20) | |||
@@ -155,21 +80,21 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; | |||
#define BIT0 1 | |||
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 | |||
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; | |||
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; | |||
static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; | |||
static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; | |||
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ | |||
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ | |||
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; | |||
static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; | |||
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; | |||
#define ZSTD_FRAMECHECKSUMSIZE 4 | |||
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ | |||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ | |||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ | |||
#define MIN_LITERALS_FOR_4_STREAMS 6 | |||
#define HufLog 12 | |||
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; | |||
#define LONGNBSEQ 0x7F00 | |||
@@ -177,6 +102,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy | |||
#define MINMATCH 3 | |||
#define Litbits 8 | |||
#define LitHufLog 11 | |||
#define MaxLit ((1<<Litbits) - 1) | |||
#define MaxML 52 | |||
#define MaxLL 35 | |||
@@ -187,62 +113,89 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy | |||
#define LLFSELog 9 | |||
#define OffFSELog 8 | |||
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) | |||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 6, 7, 8, 9,10,11,12, | |||
13,14,15,16 }; | |||
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, | |||
2, 2, 2, 2, 2, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, | |||
2, 3, 2, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1 }; | |||
#define MaxMLBits 16 | |||
#define MaxLLBits 16 | |||
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ | |||
/* Each table cannot take more than #symbols * FSELog bits */ | |||
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) | |||
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 6, 7, 8, 9,10,11,12, | |||
13,14,15,16 | |||
}; | |||
static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { | |||
4, 3, 2, 2, 2, 2, 2, 2, | |||
2, 2, 2, 2, 2, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, | |||
2, 3, 2, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1 | |||
}; | |||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */ | |||
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; | |||
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 4, 5, 7, 8, 9,10,11, | |||
12,13,14,15,16 }; | |||
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1,-1,-1, | |||
-1,-1,-1,-1,-1 }; | |||
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; | |||
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 4, 5, 7, 8, 9,10,11, | |||
12,13,14,15,16 | |||
}; | |||
static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = { | |||
1, 4, 3, 2, 2, 2, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1,-1,-1, | |||
-1,-1,-1,-1,-1 | |||
}; | |||
#define ML_DEFAULTNORMLOG 6 /* for static allocation */ | |||
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; | |||
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1,-1 }; | |||
static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; | |||
static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = { | |||
1, 1, 1, 1, 1, 1, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1,-1 | |||
}; | |||
#define OF_DEFAULTNORMLOG 5 /* for static allocation */ | |||
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; | |||
static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; | |||
/*-******************************************* | |||
* Shared functions to include for inlining | |||
*********************************************/ | |||
static void ZSTD_copy8(void* dst, const void* src) { | |||
#ifdef __aarch64__ | |||
#if defined(ZSTD_ARCH_ARM_NEON) | |||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); | |||
#else | |||
memcpy(dst, src, 8); | |||
ZSTD_memcpy(dst, src, 8); | |||
#endif | |||
} | |||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } | |||
/* Need to use memmove here since the literal buffer can now be located within | |||
the dst buffer. In circumstances where the op "catches up" to where the | |||
literal buffer is, there can be partial overlaps in this call on the final | |||
copy if the literal is being shifted by less than 16 bytes. */ | |||
static void ZSTD_copy16(void* dst, const void* src) { | |||
#ifdef __aarch64__ | |||
#if defined(ZSTD_ARCH_ARM_NEON) | |||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | |||
#elif defined(ZSTD_ARCH_X86_SSE2) | |||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); | |||
#elif defined(__clang__) | |||
ZSTD_memmove(dst, src, 16); | |||
#else | |||
memcpy(dst, src, 16); | |||
/* ZSTD_memmove is not inlined properly by gcc */ | |||
BYTE copy16_buf[16]; | |||
ZSTD_memcpy(copy16_buf, src, 16); | |||
ZSTD_memcpy(dst, copy16_buf, 16); | |||
#endif | |||
} | |||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } | |||
@@ -257,7 +210,7 @@ typedef enum { | |||
} ZSTD_overlap_e; | |||
/*! ZSTD_wildcopy() : | |||
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) | |||
* Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) | |||
* @param ovtype controls the overlap detection | |||
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. | |||
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. | |||
@@ -271,8 +224,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e | |||
BYTE* op = (BYTE*)dst; | |||
BYTE* const oend = op + length; | |||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); | |||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | |||
/* Handle short offset copies. */ | |||
do { | |||
@@ -286,20 +237,15 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e | |||
* one COPY16() in the first call. Then, do two calls per loop since | |||
* at that point it is more likely to have a high trip count. | |||
*/ | |||
#ifndef __aarch64__ | |||
do { | |||
COPY16(op, ip); | |||
} | |||
while (op < oend); | |||
#else | |||
COPY16(op, ip); | |||
if (op >= oend) return; | |||
ZSTD_copy16(op, ip); | |||
if (16 >= length) return; | |||
op += 16; | |||
ip += 16; | |||
do { | |||
COPY16(op, ip); | |||
COPY16(op, ip); | |||
} | |||
while (op < oend); | |||
#endif | |||
} | |||
} | |||
@@ -307,7 +253,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, | |||
{ | |||
size_t const length = MIN(dstCapacity, srcSize); | |||
if (length > 0) { | |||
memcpy(dst, src, length); | |||
ZSTD_memcpy(dst, src, length); | |||
} | |||
return length; | |||
} | |||
@@ -322,28 +268,46 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, | |||
* In which case, resize it down to free some memory */ | |||
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 | |||
/* Controls whether the input/output buffer is buffered or stable. */ | |||
typedef enum { | |||
ZSTD_bm_buffered = 0, /* Buffer the input/output */ | |||
ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ | |||
} ZSTD_bufferMode_e; | |||
/*-******************************************* | |||
* Private declarations | |||
*********************************************/ | |||
typedef struct seqDef_s { | |||
U32 offset; | |||
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ | |||
U16 litLength; | |||
U16 matchLength; | |||
U16 mlBase; /* mlBase == matchLength - MINMATCH */ | |||
} seqDef; | |||
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ | |||
typedef enum { | |||
ZSTD_llt_none = 0, /* no longLengthType */ | |||
ZSTD_llt_literalLength = 1, /* represents a long literal */ | |||
ZSTD_llt_matchLength = 2 /* represents a long match */ | |||
} ZSTD_longLengthType_e; | |||
typedef struct { | |||
seqDef* sequencesStart; | |||
seqDef* sequences; | |||
BYTE* litStart; | |||
BYTE* lit; | |||
BYTE* llCode; | |||
BYTE* mlCode; | |||
BYTE* ofCode; | |||
seqDef* sequences; /* ptr to end of sequences */ | |||
BYTE* litStart; | |||
BYTE* lit; /* ptr to end of literals */ | |||
BYTE* llCode; | |||
BYTE* mlCode; | |||
BYTE* ofCode; | |||
size_t maxNbSeq; | |||
size_t maxNbLit; | |||
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ | |||
U32 longLengthPos; | |||
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength | |||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment | |||
* the existing value of the litLength or matchLength by 0x10000. | |||
*/ | |||
ZSTD_longLengthType_e longLengthType; | |||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */ | |||
} seqStore_t; | |||
typedef struct { | |||
@@ -353,19 +317,19 @@ typedef struct { | |||
/** | |||
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences | |||
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. | |||
* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. | |||
*/ | |||
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) | |||
{ | |||
ZSTD_sequenceLength seqLen; | |||
seqLen.litLength = seq->litLength; | |||
seqLen.matchLength = seq->matchLength + MINMATCH; | |||
seqLen.matchLength = seq->mlBase + MINMATCH; | |||
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { | |||
if (seqStore->longLengthID == 1) { | |||
seqLen.litLength += 0xFFFF; | |||
if (seqStore->longLengthType == ZSTD_llt_literalLength) { | |||
seqLen.litLength += 0x10000; | |||
} | |||
if (seqStore->longLengthID == 2) { | |||
seqLen.matchLength += 0xFFFF; | |||
if (seqStore->longLengthType == ZSTD_llt_matchLength) { | |||
seqLen.matchLength += 0x10000; | |||
} | |||
} | |||
return seqLen; | |||
@@ -378,42 +342,18 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore | |||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR` | |||
*/ | |||
typedef struct { | |||
size_t nbBlocks; | |||
size_t compressedSize; | |||
unsigned long long decompressedBound; | |||
} ZSTD_frameSizeInfo; /* decompress & legacy */ | |||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ | |||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ | |||
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ | |||
/* custom memory allocation functions */ | |||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); | |||
void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); | |||
void ZSTD_free(void* ptr, ZSTD_customMem customMem); | |||
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ | |||
{ | |||
assert(val != 0); | |||
{ | |||
# if defined(_MSC_VER) /* Visual */ | |||
unsigned long r=0; | |||
return _BitScanReverse(&r, val) ? (unsigned)r : 0; | |||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ | |||
return __builtin_clz (val) ^ 31; | |||
# elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
return 31 - __CLZ(val); | |||
# else /* Software version */ | |||
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; | |||
U32 v = val; | |||
v |= v >> 1; | |||
v |= v >> 2; | |||
v |= v >> 4; | |||
v |= v >> 8; | |||
v |= v >> 16; | |||
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; | |||
# endif | |||
} | |||
} | |||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); | |||
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); | |||
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); | |||
/* ZSTD_invalidateRepCodes() : | |||
@@ -441,6 +381,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, | |||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |||
const void* src, size_t srcSize); | |||
/** | |||
* @returns true iff the CPU supports dynamic BMI2 dispatch. | |||
*/ | |||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void) | |||
{ | |||
ZSTD_cpuid_t cpuid = ZSTD_cpuid(); | |||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); | |||
} | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -17,7 +17,20 @@ extern "C" { | |||
#include "zstd_compress_internal.h" | |||
/** | |||
* Dedicated Dictionary Search Structure bucket log. In the | |||
* ZSTD_dedicatedDictSearch mode, the hashTable has | |||
* 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just | |||
* one. | |||
*/ | |||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 | |||
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ | |||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); | |||
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); | |||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); | |||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ | |||
@@ -33,6 +46,15 @@ size_t ZSTD_compressBlock_lazy( | |||
size_t ZSTD_compressBlock_greedy( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
@@ -46,6 +68,34 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( | |||
size_t ZSTD_compressBlock_greedy_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_dictMatchState_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_dictMatchState_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_dictMatchState_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
@@ -56,10 +106,20 @@ size_t ZSTD_compressBlock_lazy_extDict( | |||
size_t ZSTD_compressBlock_lazy2_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_extDict_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_extDict_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_extDict_row( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
#if defined (__cplusplus) | |||
} | |||
#endif |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,13 +11,126 @@ | |||
#include "zstd_ldm.h" | |||
#include "debug.h" | |||
#include "xxhash.h" | |||
#include "zstd_fast.h" /* ZSTD_fillHashTable() */ | |||
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ | |||
#include "zstd_ldm_geartab.h" | |||
#define LDM_BUCKET_SIZE_LOG 3 | |||
#define LDM_MIN_MATCH_LENGTH 64 | |||
#define LDM_HASH_RLOG 7 | |||
#define LDM_HASH_CHAR_OFFSET 10 | |||
typedef struct { | |||
U64 rolling; | |||
U64 stopMask; | |||
} ldmRollingHashState_t; | |||
/** ZSTD_ldm_gear_init(): | |||
* | |||
* Initializes the rolling hash state such that it will honor the | |||
* settings in params. */ | |||
static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) | |||
{ | |||
unsigned maxBitsInMask = MIN(params->minMatchLength, 64); | |||
unsigned hashRateLog = params->hashRateLog; | |||
state->rolling = ~(U32)0; | |||
/* The choice of the splitting criterion is subject to two conditions: | |||
* 1. it has to trigger on average every 2^(hashRateLog) bytes; | |||
* 2. ideally, it has to depend on a window of minMatchLength bytes. | |||
* | |||
* In the gear hash algorithm, bit n depends on the last n bytes; | |||
* so in order to obtain a good quality splitting criterion it is | |||
* preferable to use bits with high weight. | |||
* | |||
* To match condition 1 we use a mask with hashRateLog bits set | |||
* and, because of the previous remark, we make sure these bits | |||
* have the highest possible weight while still respecting | |||
* condition 2. | |||
*/ | |||
if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { | |||
state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); | |||
} else { | |||
/* In this degenerate case we simply honor the hash rate. */ | |||
state->stopMask = ((U64)1 << hashRateLog) - 1; | |||
} | |||
} | |||
/** ZSTD_ldm_gear_reset() | |||
* Feeds [data, data + minMatchLength) into the hash without registering any | |||
* splits. This effectively resets the hash state. This is used when skipping | |||
* over data, either at the beginning of a block, or skipping sections. | |||
*/ | |||
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, | |||
BYTE const* data, size_t minMatchLength) | |||
{ | |||
U64 hash = state->rolling; | |||
size_t n = 0; | |||
#define GEAR_ITER_ONCE() do { \ | |||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ | |||
n += 1; \ | |||
} while (0) | |||
while (n + 3 < minMatchLength) { | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
} | |||
while (n < minMatchLength) { | |||
GEAR_ITER_ONCE(); | |||
} | |||
#undef GEAR_ITER_ONCE | |||
} | |||
/** ZSTD_ldm_gear_feed(): | |||
* | |||
* Registers in the splits array all the split points found in the first | |||
* size bytes following the data pointer. This function terminates when | |||
* either all the data has been processed or LDM_BATCH_SIZE splits are | |||
* present in the splits array. | |||
* | |||
* Precondition: The splits array must not be full. | |||
* Returns: The number of bytes processed. */ | |||
static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, | |||
BYTE const* data, size_t size, | |||
size_t* splits, unsigned* numSplits) | |||
{ | |||
size_t n; | |||
U64 hash, mask; | |||
hash = state->rolling; | |||
mask = state->stopMask; | |||
n = 0; | |||
#define GEAR_ITER_ONCE() do { \ | |||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ | |||
n += 1; \ | |||
if (UNLIKELY((hash & mask) == 0)) { \ | |||
splits[*numSplits] = n; \ | |||
*numSplits += 1; \ | |||
if (*numSplits == LDM_BATCH_SIZE) \ | |||
goto done; \ | |||
} \ | |||
} while (0) | |||
while (n + 3 < size) { | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
GEAR_ITER_ONCE(); | |||
} | |||
while (n < size) { | |||
GEAR_ITER_ONCE(); | |||
} | |||
#undef GEAR_ITER_ONCE | |||
done: | |||
state->rolling = hash; | |||
return n; | |||
} | |||
void ZSTD_ldm_adjustParameters(ldmParams_t* params, | |||
ZSTD_compressionParameters const* cParams) | |||
@@ -27,13 +140,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, | |||
DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); | |||
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; | |||
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; | |||
if (cParams->strategy >= ZSTD_btopt) { | |||
/* Get out of the way of the optimal parser */ | |||
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); | |||
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); | |||
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); | |||
params->minMatchLength = minMatch; | |||
} | |||
if (params->hashLog == 0) { | |||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); | |||
assert(params->hashLog <= ZSTD_HASHLOG_MAX); | |||
@@ -53,47 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params) | |||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); | |||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) | |||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); | |||
return params.enableLdm ? totalSize : 0; | |||
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; | |||
} | |||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) | |||
{ | |||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; | |||
} | |||
/** ZSTD_ldm_getSmallHash() : | |||
* numBits should be <= 32 | |||
* If numBits==0, returns 0. | |||
* @return : the most significant numBits of value. */ | |||
static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) | |||
{ | |||
assert(numBits <= 32); | |||
return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); | |||
} | |||
/** ZSTD_ldm_getChecksum() : | |||
* numBitsToDiscard should be <= 32 | |||
* @return : the next most significant 32 bits after numBitsToDiscard */ | |||
static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) | |||
{ | |||
assert(numBitsToDiscard <= 32); | |||
return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; | |||
} | |||
/** ZSTD_ldm_getTag() ; | |||
* Given the hash, returns the most significant numTagBits bits | |||
* after (32 + hbits) bits. | |||
* | |||
* If there are not enough bits remaining, return the last | |||
* numTagBits bits. */ | |||
static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) | |||
{ | |||
assert(numTagBits < 32 && hbits <= 32); | |||
if (32 - hbits < numTagBits) { | |||
return hash & (((U32)1 << numTagBits) - 1); | |||
} else { | |||
return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); | |||
} | |||
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; | |||
} | |||
/** ZSTD_ldm_getBucket() : | |||
@@ -110,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, | |||
size_t const hash, const ldmEntry_t entry, | |||
ldmParams_t const ldmParams) | |||
{ | |||
BYTE* const bucketOffsets = ldmState->bucketOffsets; | |||
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; | |||
bucketOffsets[hash]++; | |||
bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; | |||
} | |||
BYTE* const pOffset = ldmState->bucketOffsets + hash; | |||
unsigned const offset = *pOffset; | |||
*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; | |||
*pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); | |||
/** ZSTD_ldm_makeEntryAndInsertByTag() : | |||
* | |||
* Gets the small hash, checksum, and tag from the rollingHash. | |||
* | |||
* If the tag matches (1 << ldmParams.hashRateLog)-1, then | |||
* creates an ldmEntry from the offset, and inserts it into the hash table. | |||
* | |||
* hBits is the length of the small hash, which is the most significant hBits | |||
* of rollingHash. The checksum is the next 32 most significant bits, followed | |||
* by ldmParams.hashRateLog bits that make up the tag. */ | |||
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, | |||
U64 const rollingHash, | |||
U32 const hBits, | |||
U32 const offset, | |||
ldmParams_t const ldmParams) | |||
{ | |||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); | |||
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; | |||
if (tag == tagMask) { | |||
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); | |||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); | |||
ldmEntry_t entry; | |||
entry.offset = offset; | |||
entry.checksum = checksum; | |||
ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); | |||
} | |||
} | |||
/** ZSTD_ldm_countBackwardsMatch() : | |||
@@ -150,10 +195,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, | |||
* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ | |||
static size_t ZSTD_ldm_countBackwardsMatch( | |||
const BYTE* pIn, const BYTE* pAnchor, | |||
const BYTE* pMatch, const BYTE* pBase) | |||
const BYTE* pMatch, const BYTE* pMatchBase) | |||
{ | |||
size_t matchLength = 0; | |||
while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { | |||
while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { | |||
pIn--; | |||
pMatch--; | |||
matchLength++; | |||
@@ -161,6 +206,27 @@ static size_t ZSTD_ldm_countBackwardsMatch( | |||
return matchLength; | |||
} | |||
/** ZSTD_ldm_countBackwardsMatch_2segments() : | |||
* Returns the number of bytes that match backwards from pMatch, | |||
* even with the backwards match spanning 2 different segments. | |||
* | |||
* On reaching `pMatchBase`, start counting from mEnd */ | |||
static size_t ZSTD_ldm_countBackwardsMatch_2segments( | |||
const BYTE* pIn, const BYTE* pAnchor, | |||
const BYTE* pMatch, const BYTE* pMatchBase, | |||
const BYTE* pExtDictStart, const BYTE* pExtDictEnd) | |||
{ | |||
size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); | |||
if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { | |||
/* If backwards match is entirely in the extDict or prefix, immediately return */ | |||
return matchLength; | |||
} | |||
DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); | |||
matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); | |||
DEBUGLOG(7, "final backwards match length = %zu", matchLength); | |||
return matchLength; | |||
} | |||
/** ZSTD_ldm_fillFastTables() : | |||
* | |||
* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. | |||
@@ -176,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, | |||
switch(ms->cParams.strategy) | |||
{ | |||
case ZSTD_fast: | |||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); | |||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); | |||
break; | |||
case ZSTD_dfast: | |||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); | |||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); | |||
break; | |||
case ZSTD_greedy: | |||
@@ -198,43 +264,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, | |||
return 0; | |||
} | |||
/** ZSTD_ldm_fillLdmHashTable() : | |||
* | |||
* Fills hashTable from (lastHashed + 1) to iend (non-inclusive). | |||
* lastHash is the rolling hash that corresponds to lastHashed. | |||
* | |||
* Returns the rolling hash corresponding to position iend-1. */ | |||
static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, | |||
U64 lastHash, const BYTE* lastHashed, | |||
const BYTE* iend, const BYTE* base, | |||
U32 hBits, ldmParams_t const ldmParams) | |||
{ | |||
U64 rollingHash = lastHash; | |||
const BYTE* cur = lastHashed + 1; | |||
while (cur < iend) { | |||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], | |||
cur[ldmParams.minMatchLength-1], | |||
state->hashPower); | |||
ZSTD_ldm_makeEntryAndInsertByTag(state, | |||
rollingHash, hBits, | |||
(U32)(cur - base), ldmParams); | |||
++cur; | |||
} | |||
return rollingHash; | |||
} | |||
void ZSTD_ldm_fillHashTable( | |||
ldmState_t* state, const BYTE* ip, | |||
ldmState_t* ldmState, const BYTE* ip, | |||
const BYTE* iend, ldmParams_t const* params) | |||
{ | |||
U32 const minMatchLength = params->minMatchLength; | |||
U32 const hBits = params->hashLog - params->bucketSizeLog; | |||
BYTE const* const base = ldmState->window.base; | |||
BYTE const* const istart = ip; | |||
ldmRollingHashState_t hashState; | |||
size_t* const splits = ldmState->splitIndices; | |||
unsigned numSplits; | |||
DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); | |||
if ((size_t)(iend - ip) >= params->minMatchLength) { | |||
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); | |||
ZSTD_ldm_fillLdmHashTable( | |||
state, startingHash, ip, iend - params->minMatchLength, state->window.base, | |||
params->hashLog - params->bucketSizeLog, | |||
*params); | |||
ZSTD_ldm_gear_init(&hashState, params); | |||
while (ip < iend) { | |||
size_t hashed; | |||
unsigned n; | |||
numSplits = 0; | |||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); | |||
for (n = 0; n < numSplits; n++) { | |||
if (ip + splits[n] >= istart + minMatchLength) { | |||
BYTE const* const split = ip + splits[n] - minMatchLength; | |||
U64 const xxhash = XXH64(split, minMatchLength, 0); | |||
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); | |||
ldmEntry_t entry; | |||
entry.offset = (U32)(split - base); | |||
entry.checksum = (U32)(xxhash >> 32); | |||
ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); | |||
} | |||
} | |||
ip += hashed; | |||
} | |||
} | |||
@@ -246,10 +311,10 @@ void ZSTD_ldm_fillHashTable( | |||
* (after a long match, only update tables a limited amount). */ | |||
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) | |||
{ | |||
U32 const current = (U32)(anchor - ms->window.base); | |||
if (current > ms->nextToUpdate + 1024) { | |||
U32 const curr = (U32)(anchor - ms->window.base); | |||
if (curr > ms->nextToUpdate + 1024) { | |||
ms->nextToUpdate = | |||
current - MIN(512, current - ms->nextToUpdate - 1024); | |||
curr - MIN(512, curr - ms->nextToUpdate - 1024); | |||
} | |||
} | |||
@@ -260,11 +325,8 @@ static size_t ZSTD_ldm_generateSequences_internal( | |||
/* LDM parameters */ | |||
int const extDict = ZSTD_window_hasExtDict(ldmState->window); | |||
U32 const minMatchLength = params->minMatchLength; | |||
U64 const hashPower = ldmState->hashPower; | |||
U32 const entsPerBucket = 1U << params->bucketSizeLog; | |||
U32 const hBits = params->hashLog - params->bucketSizeLog; | |||
U32 const ldmBucketSize = 1U << params->bucketSizeLog; | |||
U32 const hashRateLog = params->hashRateLog; | |||
U32 const ldmTagMask = (1U << params->hashRateLog) - 1; | |||
/* Prefix and extDict parameters */ | |||
U32 const dictLimit = ldmState->window.dictLimit; | |||
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; | |||
@@ -276,45 +338,69 @@ static size_t ZSTD_ldm_generateSequences_internal( | |||
/* Input bounds */ | |||
BYTE const* const istart = (BYTE const*)src; | |||
BYTE const* const iend = istart + srcSize; | |||
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); | |||
BYTE const* const ilimit = iend - HASH_READ_SIZE; | |||
/* Input positions */ | |||
BYTE const* anchor = istart; | |||
BYTE const* ip = istart; | |||
/* Rolling hash */ | |||
BYTE const* lastHashed = NULL; | |||
U64 rollingHash = 0; | |||
while (ip <= ilimit) { | |||
size_t mLength; | |||
U32 const current = (U32)(ip - base); | |||
size_t forwardMatchLength = 0, backwardMatchLength = 0; | |||
ldmEntry_t* bestEntry = NULL; | |||
if (ip != istart) { | |||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], | |||
lastHashed[minMatchLength], | |||
hashPower); | |||
} else { | |||
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); | |||
/* Rolling hash state */ | |||
ldmRollingHashState_t hashState; | |||
/* Arrays for staged-processing */ | |||
size_t* const splits = ldmState->splitIndices; | |||
ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; | |||
unsigned numSplits; | |||
if (srcSize < minMatchLength) | |||
return iend - anchor; | |||
/* Initialize the rolling hash state with the first minMatchLength bytes */ | |||
ZSTD_ldm_gear_init(&hashState, params); | |||
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); | |||
ip += minMatchLength; | |||
while (ip < ilimit) { | |||
size_t hashed; | |||
unsigned n; | |||
numSplits = 0; | |||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, | |||
splits, &numSplits); | |||
for (n = 0; n < numSplits; n++) { | |||
BYTE const* const split = ip + splits[n] - minMatchLength; | |||
U64 const xxhash = XXH64(split, minMatchLength, 0); | |||
U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); | |||
candidates[n].split = split; | |||
candidates[n].hash = hash; | |||
candidates[n].checksum = (U32)(xxhash >> 32); | |||
candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); | |||
PREFETCH_L1(candidates[n].bucket); | |||
} | |||
lastHashed = ip; | |||
/* Do not insert and do not look for a match */ | |||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { | |||
ip++; | |||
continue; | |||
} | |||
for (n = 0; n < numSplits; n++) { | |||
size_t forwardMatchLength = 0, backwardMatchLength = 0, | |||
bestMatchLength = 0, mLength; | |||
U32 offset; | |||
BYTE const* const split = candidates[n].split; | |||
U32 const checksum = candidates[n].checksum; | |||
U32 const hash = candidates[n].hash; | |||
ldmEntry_t* const bucket = candidates[n].bucket; | |||
ldmEntry_t const* cur; | |||
ldmEntry_t const* bestEntry = NULL; | |||
ldmEntry_t newEntry; | |||
newEntry.offset = (U32)(split - base); | |||
newEntry.checksum = checksum; | |||
/* If a split point would generate a sequence overlapping with | |||
* the previous one, we merely register it in the hash table and | |||
* move on */ | |||
if (split < anchor) { | |||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); | |||
continue; | |||
} | |||
/* Get the best entry and compute the match lengths */ | |||
{ | |||
ldmEntry_t* const bucket = | |||
ZSTD_ldm_getBucket(ldmState, | |||
ZSTD_ldm_getSmallHash(rollingHash, hBits), | |||
*params); | |||
ldmEntry_t* cur; | |||
size_t bestMatchLength = 0; | |||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); | |||
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { | |||
for (cur = bucket; cur < bucket + entsPerBucket; cur++) { | |||
size_t curForwardMatchLength, curBackwardMatchLength, | |||
curTotalMatchLength; | |||
if (cur->checksum != checksum || cur->offset <= lowestIndex) { | |||
@@ -328,30 +414,23 @@ static size_t ZSTD_ldm_generateSequences_internal( | |||
cur->offset < dictLimit ? dictEnd : iend; | |||
BYTE const* const lowMatchPtr = | |||
cur->offset < dictLimit ? dictStart : lowPrefixPtr; | |||
curForwardMatchLength = ZSTD_count_2segments( | |||
ip, pMatch, iend, | |||
matchEnd, lowPrefixPtr); | |||
curForwardMatchLength = | |||
ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); | |||
if (curForwardMatchLength < minMatchLength) { | |||
continue; | |||
} | |||
curBackwardMatchLength = | |||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, | |||
lowMatchPtr); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( | |||
split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); | |||
} else { /* !extDict */ | |||
BYTE const* const pMatch = base + cur->offset; | |||
curForwardMatchLength = ZSTD_count(ip, pMatch, iend); | |||
curForwardMatchLength = ZSTD_count(split, pMatch, iend); | |||
if (curForwardMatchLength < minMatchLength) { | |||
continue; | |||
} | |||
curBackwardMatchLength = | |||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, | |||
lowPrefixPtr); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); | |||
} | |||
curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; | |||
if (curTotalMatchLength > bestMatchLength) { | |||
bestMatchLength = curTotalMatchLength; | |||
@@ -360,57 +439,54 @@ static size_t ZSTD_ldm_generateSequences_internal( | |||
bestEntry = cur; | |||
} | |||
} | |||
} | |||
/* No match found -- continue searching */ | |||
if (bestEntry == NULL) { | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, | |||
hBits, current, | |||
*params); | |||
ip++; | |||
continue; | |||
} | |||
/* Match found */ | |||
mLength = forwardMatchLength + backwardMatchLength; | |||
ip -= backwardMatchLength; | |||
/* No match found -- insert an entry into the hash table | |||
* and process the next candidate match */ | |||
if (bestEntry == NULL) { | |||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); | |||
continue; | |||
} | |||
{ | |||
/* Store the sequence: | |||
* ip = current - backwardMatchLength | |||
* The match is at (bestEntry->offset - backwardMatchLength) | |||
*/ | |||
U32 const matchIndex = bestEntry->offset; | |||
U32 const offset = current - matchIndex; | |||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; | |||
/* Out of sequence storage */ | |||
if (rawSeqStore->size == rawSeqStore->capacity) | |||
return ERROR(dstSize_tooSmall); | |||
seq->litLength = (U32)(ip - anchor); | |||
seq->matchLength = (U32)mLength; | |||
seq->offset = offset; | |||
rawSeqStore->size++; | |||
} | |||
/* Match found */ | |||
offset = (U32)(split - base) - bestEntry->offset; | |||
mLength = forwardMatchLength + backwardMatchLength; | |||
{ | |||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; | |||
/* Out of sequence storage */ | |||
if (rawSeqStore->size == rawSeqStore->capacity) | |||
return ERROR(dstSize_tooSmall); | |||
seq->litLength = (U32)(split - backwardMatchLength - anchor); | |||
seq->matchLength = (U32)mLength; | |||
seq->offset = offset; | |||
rawSeqStore->size++; | |||
} | |||
/* Insert the current entry into the hash table */ | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, | |||
(U32)(lastHashed - base), | |||
*params); | |||
/* Insert the current entry into the hash table --- it must be | |||
* done after the previous block to avoid clobbering bestEntry */ | |||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); | |||
assert(ip + backwardMatchLength == lastHashed); | |||
anchor = split + forwardMatchLength; | |||
/* Fill the hash table from lastHashed+1 to ip+mLength*/ | |||
/* Heuristic: don't need to fill the entire table at end of block */ | |||
if (ip + mLength <= ilimit) { | |||
rollingHash = ZSTD_ldm_fillLdmHashTable( | |||
ldmState, rollingHash, lastHashed, | |||
ip + mLength, base, hBits, *params); | |||
lastHashed = ip + mLength - 1; | |||
/* If we find a match that ends after the data that we've hashed | |||
* then we have a repeating, overlapping, pattern. E.g. all zeros. | |||
* If one repetition of the pattern matches our `stopMask` then all | |||
* repetitions will. We don't need to insert them all into out table, | |||
* only the first one. So skip over overlapping matches. | |||
* This is a major speed boost (20x) for compressing a single byte | |||
* repeated, when that byte ends up in the table. | |||
*/ | |||
if (anchor > ip + hashed) { | |||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); | |||
/* Continue the outer loop at anchor (ip + hashed == anchor). */ | |||
ip = anchor - hashed; | |||
break; | |||
} | |||
} | |||
ip += mLength; | |||
anchor = ip; | |||
ip += hashed; | |||
} | |||
return iend - anchor; | |||
} | |||
@@ -459,7 +535,7 @@ size_t ZSTD_ldm_generateSequences( | |||
assert(chunkStart < iend); | |||
/* 1. Perform overflow correction if necessary. */ | |||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { | |||
if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { | |||
U32 const ldmHSize = 1U << params->hashLog; | |||
U32 const correction = ZSTD_window_correctOverflow( | |||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); | |||
@@ -473,7 +549,7 @@ size_t ZSTD_ldm_generateSequences( | |||
* the window through early invalidation. | |||
* TODO: * Test the chunk size. | |||
* * Try invalidation after the sequence generation and test the | |||
* the offset against maxDist directly. | |||
* offset against maxDist directly. | |||
* | |||
* NOTE: Because of dictionaries + sequence splitting we MUST make sure | |||
* that any offset used is valid at the END of the sequence, since it may | |||
@@ -503,7 +579,9 @@ size_t ZSTD_ldm_generateSequences( | |||
return 0; | |||
} | |||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { | |||
void | |||
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) | |||
{ | |||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { | |||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; | |||
if (srcSize <= seq->litLength) { | |||
@@ -562,14 +640,32 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, | |||
return sequence; | |||
} | |||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { | |||
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); | |||
while (currPos && rawSeqStore->pos < rawSeqStore->size) { | |||
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; | |||
if (currPos >= currSeq.litLength + currSeq.matchLength) { | |||
currPos -= currSeq.litLength + currSeq.matchLength; | |||
rawSeqStore->pos++; | |||
} else { | |||
rawSeqStore->posInSequence = currPos; | |||
break; | |||
} | |||
} | |||
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { | |||
rawSeqStore->posInSequence = 0; | |||
} | |||
} | |||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
ZSTD_paramSwitch_e useRowMatchFinder, | |||
void const* src, size_t srcSize) | |||
{ | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
unsigned const minMatch = cParams->minMatch; | |||
ZSTD_blockCompressor const blockCompressor = | |||
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); | |||
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); | |||
/* Input bounds */ | |||
BYTE const* const istart = (BYTE const*)src; | |||
BYTE const* const iend = istart + srcSize; | |||
@@ -577,9 +673,18 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
BYTE const* ip = istart; | |||
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); | |||
/* If using opt parser, use LDMs only as candidates rather than always accepting them */ | |||
if (cParams->strategy >= ZSTD_btopt) { | |||
size_t lastLLSize; | |||
ms->ldmSeqStore = rawSeqStore; | |||
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); | |||
ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); | |||
return lastLLSize; | |||
} | |||
assert(rawSeqStore->pos <= rawSeqStore->size); | |||
assert(rawSeqStore->size <= rawSeqStore->capacity); | |||
/* Loop through each sequence and apply the block compressor to the lits */ | |||
/* Loop through each sequence and apply the block compressor to the literals */ | |||
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { | |||
/* maybeSplitSequence updates rawSeqStore->pos */ | |||
rawSeq const sequence = maybeSplitSequence(rawSeqStore, | |||
@@ -606,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
rep[0] = sequence.offset; | |||
/* Store the sequence */ | |||
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, | |||
sequence.offset + ZSTD_REP_MOVE, | |||
sequence.matchLength - MINMATCH); | |||
OFFSET_TO_OFFBASE(sequence.offset), | |||
sequence.matchLength); | |||
ip += sequence.matchLength; | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences( | |||
*/ | |||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
ZSTD_paramSwitch_e useRowMatchFinder, | |||
void const* src, size_t srcSize); | |||
/** | |||
@@ -73,11 +74,17 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
* | |||
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. | |||
* Avoids emitting matches less than `minMatch` bytes. | |||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress(). | |||
* Must be called for data that is not passed to ZSTD_ldm_blockCompress(). | |||
*/ | |||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, | |||
U32 const minMatch); | |||
/* ZSTD_ldm_skipRawSeqStoreBytes(): | |||
* Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. | |||
* Not to be used in conjunction with ZSTD_ldm_skipSequences(). | |||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress(). | |||
*/ | |||
void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); | |||
/** ZSTD_ldm_getTableSize() : | |||
* Estimate the space needed for long distance matching tables or 0 if LDM is |
@@ -0,0 +1,106 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_LDM_GEARTAB_H | |||
#define ZSTD_LDM_GEARTAB_H | |||
#include "compiler.h" /* UNUSED_ATTR */ | |||
#include "mem.h" /* U64 */ | |||
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = { | |||
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, | |||
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, | |||
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, | |||
0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, | |||
0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, | |||
0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, | |||
0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, | |||
0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, | |||
0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, | |||
0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, | |||
0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, | |||
0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, | |||
0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, | |||
0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, | |||
0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, | |||
0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, | |||
0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, | |||
0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, | |||
0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, | |||
0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, | |||
0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, | |||
0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, | |||
0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, | |||
0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, | |||
0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, | |||
0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, | |||
0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, | |||
0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, | |||
0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, | |||
0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, | |||
0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, | |||
0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, | |||
0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, | |||
0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, | |||
0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, | |||
0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, | |||
0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, | |||
0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, | |||
0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, | |||
0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, | |||
0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, | |||
0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, | |||
0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, | |||
0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, | |||
0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, | |||
0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, | |||
0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, | |||
0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, | |||
0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, | |||
0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, | |||
0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, | |||
0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, | |||
0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, | |||
0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, | |||
0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, | |||
0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, | |||
0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, | |||
0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, | |||
0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, | |||
0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, | |||
0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, | |||
0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, | |||
0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, | |||
0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, | |||
0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, | |||
0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, | |||
0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, | |||
0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, | |||
0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, | |||
0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, | |||
0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, | |||
0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, | |||
0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, | |||
0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, | |||
0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, | |||
0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, | |||
0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, | |||
0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, | |||
0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, | |||
0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, | |||
0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, | |||
0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, | |||
0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, | |||
0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, | |||
0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, | |||
0x2b4da14f2613d8f4 | |||
}; | |||
#endif /* ZSTD_LDM_GEARTAB_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the |
@@ -0,0 +1,163 @@ | |||
/* | |||
* Copyright (c) Meta Platforms, Inc. and affiliates. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_TRACE_H | |||
#define ZSTD_TRACE_H | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
#include <stddef.h> | |||
/* weak symbol support | |||
* For now, enable conservatively: | |||
* - Only GNUC | |||
* - Only ELF | |||
* - Only x86-64, i386 and aarch64 | |||
* Also, explicitly disable on platforms known not to work so they aren't | |||
* forgotten in the future. | |||
*/ | |||
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \ | |||
defined(__GNUC__) && defined(__ELF__) && \ | |||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \ | |||
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ | |||
!defined(__CYGWIN__) && !defined(_AIX) | |||
# define ZSTD_HAVE_WEAK_SYMBOLS 1 | |||
#else | |||
# define ZSTD_HAVE_WEAK_SYMBOLS 0 | |||
#endif | |||
#if ZSTD_HAVE_WEAK_SYMBOLS | |||
# define ZSTD_WEAK_ATTR __attribute__((__weak__)) | |||
#else | |||
# define ZSTD_WEAK_ATTR | |||
#endif | |||
/* Only enable tracing when weak symbols are available. */ | |||
#ifndef ZSTD_TRACE | |||
# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS | |||
#endif | |||
#if ZSTD_TRACE | |||
struct ZSTD_CCtx_s; | |||
struct ZSTD_DCtx_s; | |||
struct ZSTD_CCtx_params_s; | |||
typedef struct { | |||
/** | |||
* ZSTD_VERSION_NUMBER | |||
* | |||
* This is guaranteed to be the first member of ZSTD_trace. | |||
* Otherwise, this struct is not stable between versions. If | |||
* the version number does not match your expectation, you | |||
* should not interpret the rest of the struct. | |||
*/ | |||
unsigned version; | |||
/** | |||
* Non-zero if streaming (de)compression is used. | |||
*/ | |||
unsigned streaming; | |||
/** | |||
* The dictionary ID. | |||
*/ | |||
unsigned dictionaryID; | |||
/** | |||
* Is the dictionary cold? | |||
* Only set on decompression. | |||
*/ | |||
unsigned dictionaryIsCold; | |||
/** | |||
* The dictionary size or zero if no dictionary. | |||
*/ | |||
size_t dictionarySize; | |||
/** | |||
* The uncompressed size of the data. | |||
*/ | |||
size_t uncompressedSize; | |||
/** | |||
* The compressed size of the data. | |||
*/ | |||
size_t compressedSize; | |||
/** | |||
* The fully resolved CCtx parameters (NULL on decompression). | |||
*/ | |||
struct ZSTD_CCtx_params_s const* params; | |||
/** | |||
* The ZSTD_CCtx pointer (NULL on decompression). | |||
*/ | |||
struct ZSTD_CCtx_s const* cctx; | |||
/** | |||
* The ZSTD_DCtx pointer (NULL on compression). | |||
*/ | |||
struct ZSTD_DCtx_s const* dctx; | |||
} ZSTD_Trace; | |||
/** | |||
* A tracing context. It must be 0 when tracing is disabled. | |||
* Otherwise, any non-zero value returned by a tracing begin() | |||
* function is presented to any subsequent calls to end(). | |||
* | |||
* Any non-zero value is treated as tracing is enabled and not | |||
* interpreted by the library. | |||
* | |||
* Two possible uses are: | |||
* * A timestamp for when the begin() function was called. | |||
* * A unique key identifying the (de)compression, like the | |||
* address of the [dc]ctx pointer if you need to track | |||
* more information than just a timestamp. | |||
*/ | |||
typedef unsigned long long ZSTD_TraceCtx; | |||
/** | |||
* Trace the beginning of a compression call. | |||
* @param cctx The dctx pointer for the compression. | |||
* It can be used as a key to map begin() to end(). | |||
* @returns Non-zero if tracing is enabled. The return value is | |||
* passed to ZSTD_trace_compress_end(). | |||
*/ | |||
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin( | |||
struct ZSTD_CCtx_s const* cctx); | |||
/** | |||
* Trace the end of a compression call. | |||
* @param ctx The return value of ZSTD_trace_compress_begin(). | |||
* @param trace The zstd tracing info. | |||
*/ | |||
ZSTD_WEAK_ATTR void ZSTD_trace_compress_end( | |||
ZSTD_TraceCtx ctx, | |||
ZSTD_Trace const* trace); | |||
/** | |||
* Trace the beginning of a decompression call. | |||
* @param dctx The dctx pointer for the decompression. | |||
* It can be used as a key to map begin() to end(). | |||
* @returns Non-zero if tracing is enabled. The return value is | |||
* passed to ZSTD_trace_compress_end(). | |||
*/ | |||
ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin( | |||
struct ZSTD_DCtx_s const* dctx); | |||
/** | |||
* Trace the end of a decompression call. | |||
* @param ctx The return value of ZSTD_trace_decompress_begin(). | |||
* @param trace The zstd tracing info. | |||
*/ | |||
ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end( | |||
ZSTD_TraceCtx ctx, | |||
ZSTD_Trace const* trace); | |||
#endif /* ZSTD_TRACE */ | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* ZSTD_TRACE_H */ |