@@ -0,0 +1,555 @@ | |||
v1.4.5 | |||
fix : Compression ratio regression on huge files (> 3 GB) using high levels (--ultra) and multithreading, by @terrelln | |||
perf: Improved decompression speed: x64 : +10% (clang) / +5% (gcc); ARM : from +15% to +50%, depending on SoC, by @terrelln | |||
perf: Automatically downsizes ZSTD_DCtx when too large for too long (#2069, by @bimbashreshta) | |||
perf: Improved fast compression speed on aarch64 (#2040, ~+3%, by @caoyzh) | |||
perf: Small level 1 compression speed gains (depending on compiler) | |||
cli : New --patch-from command, create and apply patches from files, by @bimbashreshta | |||
cli : New --filelist= : Provide a list of files to operate upon from a file | |||
cli : -b -d command can now benchmark decompression on multiple files | |||
cli : New --no-content-size command | |||
cli : New --show-default-cparams information command | |||
api : ZDICT_finalizeDictionary() is promoted to stable (#2111) | |||
api : new experimental parameter ZSTD_d_stableOutBuffer (#2094) | |||
build: Generate a single-file libzstd library (#2065, by @cwoffenden) | |||
build: Relative includes no longer require -I compiler flags for zstd lib subdirs (#2103, by @felixhandte) | |||
build: zstd now compiles cleanly under -pedantic (#2099) | |||
build: zstd now compiles with make-4.3 | |||
build: Support mingw cross-compilation from Linux, by @Ericson2314 | |||
build: Meson multi-thread build fix on windows | |||
build: Some misc icc fixes backed by new ci test on travis | |||
misc: bitflip analyzer tool, by @felixhandte | |||
misc: Extend largeNbDicts benchmark to compression | |||
misc: Edit-distance match finder in contrib/ | |||
doc : Improved beginner CONTRIBUTING.md docs | |||
doc : New issue templates for zstd | |||
v1.4.4 | |||
perf: Improved decompression speed, by > 10%, by @terrelln | |||
perf: Better compression speed when re-using a context, by @felixhandte | |||
perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42 | |||
perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha | |||
perf: minor generic speed optimization, by @davidbolvansky | |||
api: new ability to extract sequences from the parser for analysis, by @bimbashrestha | |||
api: fixed decoding of magic-less frames, by @terrelln | |||
api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK | |||
cli: Named pipes support, by @bimbashrestha | |||
cli: short tar's extension support, by @stokito | |||
cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42 | |||
cli: commands --stream-size=# and --size-hint=#, by @nmagerko | |||
cli: command --exclude-compressed, by @shashank0791 | |||
cli: faster `-t` test mode | |||
cli: improved some error messages, by @vangyzen | |||
cli: fix command `-D dictionary` on Windows, reported by @artyompetrov | |||
cli: fix rare deadlock condition within dictionary builder, by @terrelln | |||
build: single-file decoder with emscripten compilation script, by @cwoffenden | |||
build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive | |||
build: fixed deprecation warning for certain gcc version, reported by @jasonma163 | |||
build: fix compilation on old gcc versions, by @cemeyer | |||
build: improved installation directories for cmake script, by Dmitri Shubin | |||
pack: modified pkgconfig, for better integration into openwrt, requested by @neheb | |||
misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format | |||
misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro | |||
v1.4.3 | |||
bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709) | |||
bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722) | |||
build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705) | |||
v1.4.2 | |||
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696) | |||
bug: Fix seekable decompression in-memory API by @iburinoc (#1695) | |||
misc: Validate blocks are smaller than size limit by @vivekmg (#1685) | |||
misc: Restructure source files by @ephiepark (#1679) | |||
v1.4.1 | |||
bug: Fix data corruption in niche use cases by @terrelln (#1659) | |||
bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595) | |||
bug: Fix out of bounds read by @terrelln (#1590) | |||
perf: Improve decode speed by ~7% @mgrice (#1668) | |||
perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681) | |||
perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658) | |||
perf: Improve compression ratio for small windowLog by @cyan4973 (#1624) | |||
perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635) | |||
api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656) | |||
cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640) | |||
cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631) | |||
cli: Restrict read permissions on destination files by @chungy (#1644) | |||
cli: zstdgrep: handle -f flag by @felixhandte (#1618) | |||
cli: zstdcat: follow symlinks by @vejnar (#1604) | |||
doc: Remove extra size limit on compressed blocks by @felixhandte (#1689) | |||
doc: Fix typo by @yk-tanigawa (#1633) | |||
doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629) | |||
build: CMake: support building with LZ4 @leeyoung624 (#1626) | |||
build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647) | |||
build: CMake: respect existing uninstall target by @j301scott (#1619) | |||
build: Make: skip multithread tests when built without support by @michaelforney (#1620) | |||
build: Make: Fix examples/ test target by @sjnam (#1603) | |||
build: Meson: rename options out of deprecated namespace by @lzutao (#1665) | |||
build: Meson: fix build by @lzutao (#1602) | |||
build: Visual Studio: don't export symbols in static lib by @scharan (#1650) | |||
build: Visual Studio: fix linking by @absotively (#1639) | |||
build: Fix MinGW-W64 build by @myzhang1029 (#1600) | |||
misc: Expand decodecorpus coverage by @ephiepark (#1664) | |||
v1.4.0 | |||
perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln | |||
api: Move the advanced API, including all functions in the staging section, to the stable section | |||
api: Make ZSTD_e_flush and ZSTD_e_end block for maximum forward progress | |||
api: Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter | |||
api: Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter | |||
api: Don't export ZSTDMT functions from the shared library by default | |||
api: Require ZSTD_MULTITHREAD to be defined to use ZSTDMT | |||
api: Add ZSTD_decompressBound() to provide an upper bound on decompressed size by @shakeelrao | |||
api: Fix ZSTD_decompressDCtx() corner cases with a dictionary | |||
api: Move ZSTD_getDictID_*() functions to the stable section | |||
api: Add ZSTD_c_literalCompressionMode flag to enable or disable literal compression by @terrelln | |||
api: Allow compression parameters to be set when a dictionary is used | |||
api: Allow setting parameters before or after ZSTD_CCtx_loadDictionary() is called | |||
api: Fix ZSTD_estimateCStreamSize_usingCCtxParams() | |||
api: Setting ZSTD_d_maxWindowLog to 0 means use the default | |||
cli: Ensure that a dictionary is not used to compress itself by @shakeelrao | |||
cli: Add --[no-]compress-literals flag to enable or disable literal compression | |||
doc: Update the examples to use the advanced API | |||
doc: Explain how to transition from old streaming functions to the advanced API in the header | |||
build: Improve the Windows release packages | |||
build: Improve CMake build by @hjmjohnson | |||
build: Build fixes for FreeBSD by @lwhsu | |||
build: Remove redundant warnings by @thatsafunnyname | |||
build: Fix tests on OpenBSD by @bket | |||
build: Extend fuzzer build system to work with the new clang engine | |||
build: CMake now creates the libzstd.so.1 symlink | |||
build: Improve Menson build by @lzutao | |||
misc: Fix symbolic link detection on FreeBSD | |||
misc: Use physical core count for -T0 on FreeBSD by @cemeyer | |||
misc: Fix zstd --list on truncated files by @kostmo | |||
misc: Improve logging in debug mode by @felixhandte | |||
misc: Add CirrusCI tests by @lwhsu | |||
misc: Optimize dictionary memory usage in corner cases | |||
misc: Improve the dictionary builder on small or homogeneous data | |||
misc: Fix spelling across the repo by @jsoref | |||
v1.3.8 | |||
perf: better decompression speed on large files (+7%) and cold dictionaries (+15%) | |||
perf: slightly better compression ratio at high compression modes | |||
api : finalized advanced API, last stage before "stable" status | |||
api : new --rsyncable mode, by @terrelln | |||
api : support decompression of empty frames into NULL (used to be an error) (#1385) | |||
build: new set of macros to build a minimal size decoder, by @felixhandte | |||
build: fix compilation on MIPS32, reported by @clbr (#1441) | |||
build: fix compilation with multiple -arch flags, by @ryandesign | |||
build: highly upgraded meson build, by @lzutao | |||
build: improved buck support, by @obelisk | |||
build: fix cmake script : can create debug build, by @pitrou | |||
build: Makefile : grep works on both colored consoles and systems without color support | |||
build: fixed zstd-pgo, by @bmwiedemann | |||
cli : support ZSTD_CLEVEL environment variable, by @yijinfb (#1423) | |||
cli : --no-progress flag, preserving final summary (#1371), by @terrelln | |||
cli : ensure destination file is not source file (#1422) | |||
cli : clearer error messages, especially when input file not present | |||
doc : clarified zstd_compression_format.md, by @ulikunitz | |||
misc: fixed zstdgrep, returns 1 on failure, by @lzutao | |||
misc: NEWS renamed as CHANGELOG, in accordance with fboss | |||
v1.3.7 | |||
perf: slightly better decompression speed on clang (depending on hardware target) | |||
fix : performance of dictionary compression for small input < 4 KB at levels 9 and 10 | |||
build: no longer build backtrace by default in release mode; restrict further automatic mode | |||
build: control backtrace support through build macro BACKTRACE | |||
misc: added man pages for zstdless and zstdgrep, by @samrussell | |||
v1.3.6 | |||
perf: much faster dictionary builder, by @jenniferliu | |||
perf: faster dictionary compression on small data when using multiple contexts, by @felixhandte | |||
perf: faster dictionary decompression when using a very large number of dictionaries simultaneously | |||
cli : fix : does no longer overwrite destination when source does not exist (#1082) | |||
cli : new command --adapt, for automatic compression level adaptation | |||
api : fix : block api can be streamed with > 4 GB, reported by @catid | |||
api : reduced ZSTD_DDict size by 2 KB | |||
api : minimum negative compression level is defined, and can be queried using ZSTD_minCLevel(). | |||
build: support Haiku target, by @korli | |||
build: Read Legacy format is limited to v0.5+ by default. Can be changed at compile time with macro ZSTD_LEGACY_SUPPORT. | |||
doc : zstd_compression_format.md updated to match wording in IETF RFC 8478 | |||
misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97 | |||
v1.3.5 | |||
perf: much faster dictionary compression, by @felixhandte | |||
perf: small quality improvement for dictionary generation, by @terrelln | |||
perf: slightly improved high compression levels (notably level 19) | |||
mem : automatic memory release for long duration contexts | |||
cli : fix : overlapLog can be manually set | |||
cli : fix : decoding invalid lz4 frames | |||
api : fix : performance degradation for dictionary compression when using advanced API, by @terrelln | |||
api : change : clarify ZSTD_CCtx_reset() vs ZSTD_CCtx_resetParameters(), by @terrelln | |||
build: select custom libzstd scope through control macros, by @GeorgeLu97 | |||
build: OpenBSD patch, by @bket | |||
build: make and make all are compatible with -j | |||
doc : clarify zstd_compression_format.md, updated for IETF RFC process | |||
misc: pzstd compatible with reproducible compilation, by @lamby | |||
v1.3.4 | |||
perf: faster speed (especially decoding speed) on recent cpus (haswell+) | |||
perf: much better performance associating --long with multi-threading, by @terrelln | |||
perf: better compression at levels 13-15 | |||
cli : asynchronous compression by default, for faster experience (use --single-thread for former behavior) | |||
cli : smoother status report in multi-threading mode | |||
cli : added command --fast=#, for faster compression modes | |||
cli : fix crash when not overwriting existing files, by Pádraig Brady (@pixelb) | |||
api : `nbThreads` becomes `nbWorkers` : 1 triggers asynchronous mode | |||
api : compression levels can be negative, for even more speed | |||
api : ZSTD_getFrameProgression() : get precise progress status of ZSTDMT anytime | |||
api : ZSTDMT can accept new compression parameters during compression | |||
api : implemented all advanced dictionary decompression prototypes | |||
build: improved meson recipe, by Shawn Landden (@shawnl) | |||
build: VS2017 scripts, by @HaydnTrigg | |||
misc: all /contrib projects fixed | |||
misc: added /contrib/docker script by @gyscos | |||
v1.3.3 | |||
perf: faster zstd_opt strategy (levels 16-19) | |||
fix : bug #944 : multithreading with shared ditionary and large data, reported by @gsliepen | |||
cli : fix : content size written in header by default | |||
cli : fix : improved LZ4 format support, by @felixhandte | |||
cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file | |||
api : fix : support large skippable frames, by @terrelln | |||
api : fix : streaming interface was adding a useless 3-bytes null block to small frames | |||
api : change : when setting `pledgedSrcSize`, use `ZSTD_CONTENTSIZE_UNKNOWN` macro value to mean "unknown" | |||
build: fix : compilation under rhel6 and centos6, reported by @pixelb | |||
build: added `check` target | |||
v1.3.2 | |||
new : long range mode, using --long command, by Stella Lau (@stellamplau) | |||
new : ability to generate and decode magicless frames (#591) | |||
changed : maximum nb of threads reduced to 200, to avoid address space exhaustion in 32-bits mode | |||
fix : multi-threading compression works with custom allocators | |||
fix : ZSTD_sizeof_CStream() was over-evaluating memory usage | |||
fix : a rare compression bug when compression generates very large distances and bunch of other conditions (only possible at --ultra -22) | |||
fix : 32-bits build can now decode large offsets (levels 21+) | |||
cli : added LZ4 frame support by default, by Felix Handte (@felixhandte) | |||
cli : improved --list output | |||
cli : new : can split input file for dictionary training, using command -B# | |||
cli : new : clean operation artefact on Ctrl-C interruption | |||
cli : fix : do not change /dev/null permissions when using command -t with root access, reported by @mike155 (#851) | |||
cli : fix : write file size in header in multiple-files mode | |||
api : added macro ZSTD_COMPRESSBOUND() for static allocation | |||
api : experimental : new advanced decompression API | |||
api : fix : sizeof_CCtx() used to over-estimate | |||
build: fix : no-multithread variant compiles without pool.c dependency, reported by Mitchell Blank Jr (@mitchblank) (#819) | |||
build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818) | |||
example : added streaming_memory_usage | |||
license : changed /examples license to BSD + GPLv2 | |||
license : fix a few header files to reflect new license (#825) | |||
v1.3.1 | |||
New license : BSD + GPLv2 | |||
perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk) | |||
perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760) | |||
cli : improved and fixed --list command, by @ib (#772) | |||
cli : command -vV to list supported formats, by @ib (#771) | |||
build : fixed binary variants, reported by @svenha (#788) | |||
build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718) | |||
API exp : breaking change : ZSTD_getframeHeader() provides more information | |||
API exp : breaking change : pinned down values of error codes | |||
doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz) | |||
new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74) | |||
new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau) | |||
updated : contrib/linux-kernel, by Nick Terrell (@terrelln) | |||
v1.3.0 | |||
cli : new : `--list` command, by Paul Cruz | |||
cli : changed : xz/lzma support enabled by default | |||
cli : changed : `-t *` continue processing list after a decompression error | |||
API : added : ZSTD_versionString() | |||
API : promoted to stable status : ZSTD_getFrameContentSize(), by Sean Purcell | |||
API exp : new advanced API : ZSTD_compress_generic(), ZSTD_CCtx_setParameter() | |||
API exp : new : API for static or external allocation : ZSTD_initStatic?Ctx() | |||
API exp : added : ZSTD_decompressBegin_usingDDict(), requested by Guy Riddle (#700) | |||
API exp : clarified memory estimation / measurement functions. | |||
API exp : changed : strongest strategy renamed ZSTD_btultra, fastest strategy ZSTD_fast set to 1 | |||
tools : decodecorpus can generate random dictionary-compressed samples, by Paul Cruz | |||
new : contrib/seekable_format, demo and API, by Sean Purcell | |||
changed : contrib/linux-kernel, updated version and license, by Nick Terrell | |||
v1.2.0 | |||
cli : changed : Multithreading enabled by default (use target zstd-nomt or HAVE_THREAD=0 to disable) | |||
cli : new : command -T0 means "detect and use nb of cores", by Sean Purcell | |||
cli : new : zstdmt symlink hardwired to `zstd -T0` | |||
cli : new : command --threads=# (#671) | |||
cli : changed : cover dictionary builder by default, for improved quality, by Nick Terrell | |||
cli : new : commands --train-cover and --train-legacy, to select dictionary algorithm and parameters | |||
cli : experimental targets `zstd4` and `xzstd4`, with support for lz4 format, by Sean Purcell | |||
cli : fix : does not output compressed data on console | |||
cli : fix : ignore symbolic links unless --force specified, | |||
API : breaking change : ZSTD_createCDict_advanced(), only use compressionParameters as argument | |||
API : added : prototypes ZSTD_*_usingCDict_advanced(), for direct control over frameParameters. | |||
API : improved: ZSTDMT_compressCCtx() reduced memory usage | |||
API : fix : ZSTDMT_compressCCtx() now provides srcSize in header (#634) | |||
API : fix : src size stored in frame header is controlled at end of frame | |||
API : fix : enforced consistent rules for pledgedSrcSize==0 (#641) | |||
API : fix : error code "GENERIC" replaced by "dstSizeTooSmall" when appropriate | |||
build: improved cmake script, by @Majlen | |||
build: enabled Multi-threading support for *BSD, by Baptiste Daroussin | |||
tools: updated Paramgrill. Command -O# provides best parameters for sample and speed target. | |||
new : contrib/linux-kernel version, by Nick Terrell | |||
v1.1.4 | |||
cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski | |||
cli : new : advanced benchmark command --priority=rt | |||
cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77 | |||
cli : fix : --rm remains silent when input is stdin | |||
cli : experimental : xzstd, with support for xz/lzma decoding, by Przemyslaw Skibinski | |||
speed : improved decompression speed in streaming mode for single shot scenarios (+5%) | |||
memory: DDict (decompression dictionary) memory usage down from 150 KB to 20 KB | |||
arch: 32-bits variant able to generate and decode very long matches (>32 MB), by Sean Purcell | |||
API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize() | |||
API : changed : dropped support of legacy versions <= v0.3 (can be changed by modifying ZSTD_LEGACY_SUPPORT value) | |||
build : new: meson build system in contrib/meson, by Dima Krasner | |||
build : improved cmake script, by @Majlen | |||
build : added -Wformat-security flag, as recommended by Padraig Brady | |||
doc : new : educational decoder, by Sean Purcell | |||
v1.1.3 | |||
cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`) | |||
cli : new : experimental target `make zstdmt`, with multi-threading support | |||
cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano. | |||
cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski | |||
cli : fix zstdless on Mac OS-X, by Andrew Janke | |||
cli : fix #232 "compress non-files" | |||
dictBuilder : improved dictionary generation quality, thanks to Nick Terrell | |||
API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental) | |||
API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul | |||
API : new : ZDICT_finalizeDictionary() | |||
API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511) | |||
API : fix : all symbols properly exposed in libzstd, by Nick Terrell | |||
build : support for Solaris target, by Przemyslaw Skibinski | |||
doc : clarified specification, by Sean Purcell | |||
v1.1.2 | |||
API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init | |||
API : experimental : added : dictID retrieval functions, and ZSTD_initCStream_srcSize() | |||
API : zbuff : changed : prototypes now generate deprecation warnings | |||
lib : improved : faster decompression speed at ultra compression settings and 32-bits mode | |||
lib : changed : only public ZSTD_ symbols are now exposed | |||
lib : changed : reduced usage of stack memory | |||
lib : fixed : several corner case bugs, by Nick Terrell | |||
cli : new : gzstd, experimental version able to decode .gz files, by Przemyslaw Skibinski | |||
cli : new : preserve file attributes | |||
cli : new : added zstdless and zstdgrep tools | |||
cli : fixed : status displays total amount decoded, even for file consisting of multiple frames (like pzstd) | |||
cli : fixed : zstdcat | |||
zlib_wrapper : added support for gz* functions, by Przemyslaw Skibinski | |||
install : better compatibility with FreeBSD, by Dimitry Andric | |||
source tree : changed : zbuff source files moved to lib/deprecated | |||
v1.1.1 | |||
New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption | |||
New : doc/zstd_manual.html, by Przemyslaw Skibinski | |||
Improved : slightly better compression ratio at --ultra levels (>= 20) | |||
Improved : better memory usage when using streaming compression API, thanks to @Rogier-5 report | |||
Added : API : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section) | |||
Added : example/multiple_streaming_compression.c | |||
Changed : zstd_errors.h is now installed within /include (and replaces errors_public.h) | |||
Updated man page | |||
Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets | |||
v1.1.0 | |||
New : contrib/pzstd, parallel version of zstd, by Nick Terrell | |||
added : NetBSD install target (#338) | |||
Improved : speed for batches of small files | |||
Improved : speed of zlib wrapper, by Przemyslaw Skibinski | |||
Changed : libzstd on Windows supports legacy formats, by Christophe Chevalier | |||
Fixed : CLI -d output to stdout by default when input is stdin (#322) | |||
Fixed : CLI correctly detects console on Mac OS-X | |||
Fixed : CLI supports recursive mode `-r` on Mac OS-X | |||
Fixed : Legacy decoders use unified error codes, reported by benrg (#341), fixed by Przemyslaw Skibinski | |||
Fixed : compatibility with OpenBSD, reported by Juan Francisco Cantero Hurtado (#319) | |||
Fixed : compatibility with Hurd, by Przemyslaw Skibinski (#365) | |||
Fixed : zstd-pgo, reported by octoploid (#329) | |||
v1.0.0 | |||
Change Licensing, all project is now BSD, Copyright Facebook | |||
Small decompression speed improvement | |||
API : Streaming API supports legacy format | |||
API : ZDICT_getDictID(), ZSTD_sizeof_{CCtx, DCtx, CStream, DStream}(), ZSTD_setDStreamParameter() | |||
CLI supports legacy formats v0.4+ | |||
Fixed : compression fails on certain huge files, reported by Jesse McGrew | |||
Enhanced documentation, by Przemyslaw Skibinski | |||
v0.8.1 | |||
New streaming API | |||
Changed : --ultra now enables levels beyond 19 | |||
Changed : -i# now selects benchmark time in second | |||
Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell | |||
Fixed : speed regression on specific patterns (#272) | |||
Fixed : support for Z_SYNC_FLUSH, by Dmitry Krot (#291) | |||
Fixed : ICC compilation, by Przemyslaw Skibinski | |||
v0.8.0 | |||
Improved : better speed on clang and gcc -O2, thanks to Eric Biggers | |||
New : Build on FreeBSD and DragonFly, thanks to JrMarino | |||
Changed : modified API : ZSTD_compressEnd() | |||
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist | |||
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers | |||
Fixed : large dictionaries (> 384 KB), reported by Ilona Papava | |||
Fixed : checksum correctly checked in single-pass mode | |||
Fixed : combined --test amd --rm, reported by Andreas M. Nilsson | |||
Modified : minor compression level adaptations | |||
Updated : compression format specification to v0.2.0 | |||
changed : zstd.h moved to /lib directory | |||
v0.7.5 | |||
Transition version, supporting decoding of v0.8.x | |||
v0.7.4 | |||
Added : homebrew for Mac, by Daniel Cade | |||
Added : more examples | |||
Fixed : segfault when using small dictionaries, reported by Felix Handte | |||
Modified : default compression level for CLI is now 3 | |||
Updated : specification, to v0.1.1 | |||
v0.7.3 | |||
New : compression format specification | |||
New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner. | |||
New : `ZSTD_getDecompressedSize()` | |||
New : OpenBSD target, by Juan Francisco Cantero Hurtado | |||
New : `examples` directory | |||
fixed : dictBuilder using HC levels, reported by Bartosz Taudul | |||
fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte | |||
fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski | |||
modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section) | |||
modified : legacy functions no longer need magic number | |||
v0.7.2 | |||
fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski. | |||
fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner. | |||
fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner. | |||
v0.7.1 | |||
fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier | |||
fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв | |||
fixed : corruption issue, reported by cj | |||
modified : checksum enabled by default in command line mode | |||
v0.7.0 | |||
New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski | |||
New : Command `--rm`, to remove source file after successful de/compression | |||
New : Visual build scripts, by Christophe Chevalier | |||
New : Support for Sparse File-systems (do not use space for zero-filled sectors) | |||
New : Frame checksum support | |||
New : Support pass-through mode (when using `-df`) | |||
API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()` | |||
API : create dictionary files from custom content, by Giuseppe Ottaviano | |||
API : support for custom malloc/free functions | |||
New : controllable Dictionary ID | |||
New : Support for skippable frames | |||
v0.6.1 | |||
New : zlib wrapper API, thanks to Przemyslaw Skibinski | |||
New : Ability to compile compressor / decompressor separately | |||
Changed : new lib directory structure | |||
Fixed : Legacy codec v0.5 compatible with dictionary decompression | |||
Fixed : Decoder corruption error (#173) | |||
Fixed : null-string roundtrip (#176) | |||
New : benchmark mode can select directory as input | |||
Experimental : midipix support, VMS support | |||
v0.6.0 | |||
Stronger high compression modes, thanks to Przemyslaw Skibinski | |||
API : ZSTD_getFrameParams() provides size of decompressed content | |||
New : highest compression modes require `--ultra` command to fully unleash their capacity | |||
Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner | |||
v0.5.1 | |||
New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski | |||
Changed : Dictionary builder integrated into libzstd and zstd cli | |||
Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`. | |||
Fix : high compression modes for big-endian platforms | |||
New : zstd cli : `-t` | `--test` command | |||
v0.5.0 | |||
New : dictionary builder utility | |||
Changed : streaming & dictionary API | |||
Improved : better compression of small data | |||
v0.4.7 | |||
Improved : small compression speed improvement in HC mode | |||
Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default | |||
fix : bt search bug | |||
v0.4.6 | |||
fix : fast compression mode on Windows | |||
New : cmake configuration file, thanks to Artyom Dymchenko | |||
Improved : high compression mode on repetitive data | |||
New : block-level API | |||
New : ZSTD_duplicateCCtx() | |||
v0.4.5 | |||
new : -m/--multiple : compress/decompress multiple files | |||
v0.4.4 | |||
Fixed : high compression modes for Windows 32 bits | |||
new : external dictionary API extended to buffered mode and accessible through command line | |||
new : windows DLL project, thanks to Christophe Chevalier | |||
v0.4.3 : | |||
new : external dictionary API | |||
new : zstd-frugal | |||
v0.4.2 : | |||
Generic minor improvements for small blocks | |||
Fixed : big-endian compatibility, by Peter Harris (#85) | |||
v0.4.1 | |||
Fixed : ZSTD_LEGACY_SUPPORT=0 build mode (reported by Luben) | |||
removed `zstd.c` | |||
v0.4.0 | |||
Command line utility compatible with high compression levels | |||
Removed zstdhc => merged into zstd | |||
Added : ZBUFF API (see zstd_buffered.h) | |||
Rolling buffer support | |||
v0.3.6 | |||
small blocks params | |||
v0.3.5 | |||
minor generic compression improvements | |||
v0.3.4 | |||
Faster fast cLevels | |||
v0.3.3 | |||
Small compression ratio improvement | |||
v0.3.2 | |||
Fixed Visual Studio | |||
v0.3.1 : | |||
Small compression ratio improvement | |||
v0.3 | |||
HC mode : compression levels 2-26 | |||
v0.2.2 | |||
Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier | |||
v0.2.1 | |||
Fix : Read errors, advanced fuzzer tests, by Hanno Böck | |||
v0.2.0 | |||
**Breaking format change** | |||
Faster decompression speed | |||
Can still decode v0.1 format | |||
v0.1.3 | |||
fix uninitialization warning, reported by Evan Nemerson | |||
v0.1.2 | |||
frame concatenation support | |||
v0.1.1 | |||
fix compression bug | |||
detects write-flush errors | |||
v0.1.0 | |||
first release |
@@ -1,23 +1,27 @@ | |||
SET(ZSTDSRC | |||
cover.c | |||
debug.c | |||
divsufsort.c | |||
entropy_common.c | |||
error_private.c | |||
fse_compress.c | |||
fse_decompress.c | |||
hist.c | |||
huf_compress.c | |||
huf_decompress.c | |||
pool.c | |||
threading.c | |||
zdict.c | |||
zstd_common.c | |||
zstd_compress.c | |||
zstd_compress_literals.c | |||
zstd_compress_sequences.c | |||
zstd_compress_superblock.c | |||
zstd_ddict.c | |||
zstd_decompress.c | |||
zstd_decompress_block.c | |||
zstd_double_fast.c | |||
zstd_fast.c | |||
zstd_lazy.c | |||
zstd_ldm.c | |||
zstdmt_compress.c | |||
zstd_opt.c) | |||
ADD_LIBRARY(rspamd-zstd STATIC ${ZSTDSRC}) |
@@ -1,36 +1,15 @@ | |||
/* ****************************************************************** | |||
bitstream | |||
Part of FSE library | |||
header file (to include) | |||
Copyright (C) 2013-2017, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* bitstream | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
#ifndef BITSTREAM_H_MODULE | |||
#define BITSTREAM_H_MODULE | |||
@@ -49,26 +28,18 @@ extern "C" { | |||
* Dependencies | |||
******************************************/ | |||
#include "mem.h" /* unaligned access routines */ | |||
#include "compiler.h" /* UNLIKELY() */ | |||
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ | |||
#include "error_private.h" /* error codes and messages */ | |||
/*-************************************* | |||
* Debug | |||
***************************************/ | |||
#if defined(BIT_DEBUG) && (BIT_DEBUG>=1) | |||
# include <assert.h> | |||
#else | |||
# ifndef assert | |||
# define assert(condition) ((void)0) | |||
# endif | |||
#endif | |||
/*========================================= | |||
* Target specific | |||
=========================================*/ | |||
#if defined(__BMI__) && defined(__GNUC__) | |||
# include <immintrin.h> /* support for bextr (experimental) */ | |||
#elif defined(__ICCARM__) | |||
# include <intrinsics.h> | |||
#endif | |||
#define STREAM_ACCUMULATOR_MIN_32 25 | |||
@@ -83,8 +54,7 @@ extern "C" { | |||
* A critical property of these streams is that they encode and decode in **reverse** direction. | |||
* So the first bit sequence you add will be the last to be read, like a LIFO stack. | |||
*/ | |||
typedef struct | |||
{ | |||
typedef struct { | |||
size_t bitContainer; | |||
unsigned bitPos; | |||
char* startPtr; | |||
@@ -118,8 +88,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); | |||
/*-******************************************** | |||
* bitStream decoding API (read backward) | |||
**********************************************/ | |||
typedef struct | |||
{ | |||
typedef struct { | |||
size_t bitContainer; | |||
unsigned bitsConsumed; | |||
const char* ptr; | |||
@@ -167,16 +136,17 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); | |||
/*-************************************************************** | |||
* Internal functions | |||
****************************************************************/ | |||
MEM_STATIC unsigned BIT_highbit32 (register U32 val) | |||
MEM_STATIC unsigned BIT_highbit32 (U32 val) | |||
{ | |||
assert(val != 0); | |||
{ | |||
# if defined(_MSC_VER) /* Visual */ | |||
unsigned long r=0; | |||
_BitScanReverse ( &r, val ); | |||
return (unsigned) r; | |||
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; | |||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | |||
return 31 - __builtin_clz (val); | |||
return __builtin_clz (val) ^ 31; | |||
# elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
return 31 - __CLZ(val); | |||
# else /* Software version */ | |||
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | |||
11, 14, 16, 18, 22, 25, 3, 30, | |||
@@ -236,7 +206,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, | |||
} | |||
/*! BIT_addBitsFast() : | |||
* works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ | |||
* works only if `value` is _clean_, | |||
* meaning all high bits above nbBits are 0 */ | |||
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, | |||
size_t value, unsigned nbBits) | |||
{ | |||
@@ -253,9 +224,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) | |||
{ | |||
size_t const nbBytes = bitC->bitPos >> 3; | |||
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
assert(bitC->ptr <= bitC->endPtr); | |||
MEM_writeLEST(bitC->ptr, bitC->bitContainer); | |||
bitC->ptr += nbBytes; | |||
assert(bitC->ptr <= bitC->endPtr); | |||
bitC->bitPos &= 7; | |||
bitC->bitContainer >>= nbBytes*8; | |||
} | |||
@@ -269,6 +240,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) | |||
{ | |||
size_t const nbBytes = bitC->bitPos >> 3; | |||
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | |||
assert(bitC->ptr <= bitC->endPtr); | |||
MEM_writeLEST(bitC->ptr, bitC->bitContainer); | |||
bitC->ptr += nbBytes; | |||
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; | |||
@@ -352,17 +324,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) | |||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) | |||
{ | |||
#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ | |||
# if defined(__x86_64__) | |||
if (sizeof(bitContainer)==8) | |||
return _bextr_u64(bitContainer, start, nbBits); | |||
else | |||
# endif | |||
return _bextr_u32(bitContainer, start, nbBits); | |||
#else | |||
U32 const regMask = sizeof(bitContainer)*8 - 1; | |||
/* if start > regMask, bitstream is corrupted, and result is undefined */ | |||
assert(nbBits < BIT_MASK_SIZE); | |||
return (bitContainer >> start) & BIT_mask[nbBits]; | |||
#endif | |||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; | |||
} | |||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |||
@@ -379,9 +344,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | |||
* @return : value extracted */ | |||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | |||
{ | |||
#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ | |||
/* arbitrate between double-shift and shift+mask */ | |||
#if 1 | |||
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, | |||
* bitstream is likely corrupted, and result is undefined */ | |||
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); | |||
#else | |||
/* this code path is slower on my os-x laptop */ | |||
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | |||
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); | |||
#endif | |||
@@ -405,7 +374,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | |||
* Read (consume) next n bits from local register and update. | |||
* Pay attention to not read more than nbBits contained into local register. | |||
* @return : extracted value. */ | |||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) | |||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | |||
{ | |||
size_t const value = BIT_lookBits(bitD, nbBits); | |||
BIT_skipBits(bitD, nbBits); | |||
@@ -414,7 +383,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) | |||
/*! BIT_readBitsFast() : | |||
* unsafe version; only works only if nbBits >= 1 */ | |||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) | |||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) | |||
{ | |||
size_t const value = BIT_lookBitsFast(bitD, nbBits); | |||
assert(nbBits >= 1); | |||
@@ -422,21 +391,35 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) | |||
return value; | |||
} | |||
/*! BIT_reloadDStreamFast() : | |||
* Similar to BIT_reloadDStream(), but with two differences: | |||
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! | |||
* 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this | |||
* point you must use BIT_reloadDStream() to reload. | |||
*/ | |||
MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) | |||
{ | |||
if (UNLIKELY(bitD->ptr < bitD->limitPtr)) | |||
return BIT_DStream_overflow; | |||
assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); | |||
bitD->ptr -= bitD->bitsConsumed >> 3; | |||
bitD->bitsConsumed &= 7; | |||
bitD->bitContainer = MEM_readLEST(bitD->ptr); | |||
return BIT_DStream_unfinished; | |||
} | |||
/*! BIT_reloadDStream() : | |||
* Refill `bitD` from buffer previously set in BIT_initDStream() . | |||
* This function is safe, it guarantees it will not read beyond src buffer. | |||
* @return : status of `BIT_DStream_t` internal register. | |||
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ | |||
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ | |||
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) | |||
{ | |||
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ | |||
return BIT_DStream_overflow; | |||
if (bitD->ptr >= bitD->limitPtr) { | |||
bitD->ptr -= bitD->bitsConsumed >> 3; | |||
bitD->bitsConsumed &= 7; | |||
bitD->bitContainer = MEM_readLEST(bitD->ptr); | |||
return BIT_DStream_unfinished; | |||
return BIT_reloadDStreamFast(bitD); | |||
} | |||
if (bitD->ptr == bitD->start) { | |||
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -15,13 +15,15 @@ | |||
* Compiler specifics | |||
*********************************************************/ | |||
/* force inlining */ | |||
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | |||
#if !defined(ZSTD_NO_INLINE) | |||
#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | |||
# define INLINE_KEYWORD inline | |||
#else | |||
# define INLINE_KEYWORD | |||
#endif | |||
#if defined(__GNUC__) | |||
#if defined(__GNUC__) || defined(__ICCARM__) | |||
# define FORCE_INLINE_ATTR __attribute__((always_inline)) | |||
#elif defined(_MSC_VER) | |||
# define FORCE_INLINE_ATTR __forceinline | |||
@@ -29,9 +31,16 @@ | |||
# define FORCE_INLINE_ATTR | |||
#endif | |||
#else | |||
#define INLINE_KEYWORD | |||
#define FORCE_INLINE_ATTR | |||
#endif | |||
/** | |||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | |||
* parameters. They must be inlined for the compiler to elimininate the constant | |||
* parameters. They must be inlined for the compiler to eliminate the constant | |||
* branches. | |||
*/ | |||
#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR | |||
@@ -52,25 +61,105 @@ | |||
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR | |||
#endif | |||
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ | |||
#if defined(__GNUC__) | |||
# define UNUSED_ATTR __attribute__((unused)) | |||
#else | |||
# define UNUSED_ATTR | |||
#endif | |||
/* force no inlining */ | |||
#ifdef _MSC_VER | |||
# define FORCE_NOINLINE static __declspec(noinline) | |||
#else | |||
# ifdef __GNUC__ | |||
# if defined(__GNUC__) || defined(__ICCARM__) | |||
# define FORCE_NOINLINE static __attribute__((__noinline__)) | |||
# else | |||
# define FORCE_NOINLINE static | |||
# endif | |||
#endif | |||
/* prefetch */ | |||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ | |||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | |||
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) | |||
#elif defined(__GNUC__) | |||
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) | |||
/* target attribute */ | |||
#ifndef __has_attribute | |||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | |||
#endif | |||
#if defined(__GNUC__) || defined(__ICCARM__) | |||
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | |||
#else | |||
# define TARGET_ATTRIBUTE(target) | |||
#endif | |||
/* Enable runtime BMI2 dispatch based on the CPU. | |||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | |||
*/ | |||
#ifndef DYNAMIC_BMI2 | |||
#if ((defined(__clang__) && __has_attribute(__target__)) \ | |||
|| (defined(__GNUC__) \ | |||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | |||
&& (defined(__x86_64__) || defined(_M_X86)) \ | |||
&& !defined(__BMI2__) | |||
# define DYNAMIC_BMI2 1 | |||
#else | |||
# define DYNAMIC_BMI2 0 | |||
#endif | |||
#endif | |||
/* prefetch | |||
* can be disabled, by declaring NO_PREFETCH build macro */ | |||
#if defined(NO_PREFETCH) | |||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | |||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | |||
#else | |||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ | |||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | |||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) | |||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) | |||
# elif defined(__aarch64__) | |||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) | |||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) | |||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) | |||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) | |||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) | |||
# else | |||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | |||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | |||
# endif | |||
#endif /* NO_PREFETCH */ | |||
#define CACHELINE_SIZE 64 | |||
#define PREFETCH_AREA(p, s) { \ | |||
const char* const _ptr = (const char*)(p); \ | |||
size_t const _size = (size_t)(s); \ | |||
size_t _pos; \ | |||
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ | |||
PREFETCH_L2(_ptr + _pos); \ | |||
} \ | |||
} | |||
/* vectorization | |||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ | |||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) | |||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) | |||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |||
# else | |||
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") | |||
# endif | |||
#else | |||
# define DONT_VECTORIZE | |||
#endif | |||
/* Tell the compiler that a branch is likely or unlikely. | |||
* Only use these macros if it causes the compiler to generate better code. | |||
* If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc | |||
* and clang, please do. | |||
*/ | |||
#if defined(__GNUC__) | |||
#define LIKELY(x) (__builtin_expect((x), 1)) | |||
#define UNLIKELY(x) (__builtin_expect((x), 0)) | |||
#else | |||
# define PREFETCH(ptr) /* disabled */ | |||
#define LIKELY(x) (x) | |||
#define UNLIKELY(x) (x) | |||
#endif | |||
/* disable warnings */ |
@@ -0,0 +1,215 @@ | |||
/* | |||
* Copyright (c) 2018-2020, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_COMMON_CPU_H | |||
#define ZSTD_COMMON_CPU_H | |||
/** | |||
* Implementation taken from folly/CpuId.h | |||
* https://github.com/facebook/folly/blob/master/folly/CpuId.h | |||
*/ | |||
#include <string.h> | |||
#include "mem.h" | |||
#ifdef _MSC_VER | |||
#include <intrin.h> | |||
#endif | |||
typedef struct { | |||
U32 f1c; | |||
U32 f1d; | |||
U32 f7b; | |||
U32 f7c; | |||
} ZSTD_cpuid_t; | |||
MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { | |||
U32 f1c = 0; | |||
U32 f1d = 0; | |||
U32 f7b = 0; | |||
U32 f7c = 0; | |||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) | |||
int reg[4]; | |||
__cpuid((int*)reg, 0); | |||
{ | |||
int const n = reg[0]; | |||
if (n >= 1) { | |||
__cpuid((int*)reg, 1); | |||
f1c = (U32)reg[2]; | |||
f1d = (U32)reg[3]; | |||
} | |||
if (n >= 7) { | |||
__cpuidex((int*)reg, 7, 0); | |||
f7b = (U32)reg[1]; | |||
f7c = (U32)reg[2]; | |||
} | |||
} | |||
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) | |||
/* The following block like the normal cpuid branch below, but gcc | |||
* reserves ebx for use of its pic register so we must specially | |||
* handle the save and restore to avoid clobbering the register | |||
*/ | |||
U32 n; | |||
__asm__( | |||
"pushl %%ebx\n\t" | |||
"cpuid\n\t" | |||
"popl %%ebx\n\t" | |||
: "=a"(n) | |||
: "a"(0) | |||
: "ecx", "edx"); | |||
if (n >= 1) { | |||
U32 f1a; | |||
__asm__( | |||
"pushl %%ebx\n\t" | |||
"cpuid\n\t" | |||
"popl %%ebx\n\t" | |||
: "=a"(f1a), "=c"(f1c), "=d"(f1d) | |||
: "a"(1)); | |||
} | |||
if (n >= 7) { | |||
__asm__( | |||
"pushl %%ebx\n\t" | |||
"cpuid\n\t" | |||
"movl %%ebx, %%eax\n\t" | |||
"popl %%ebx" | |||
: "=a"(f7b), "=c"(f7c) | |||
: "a"(7), "c"(0) | |||
: "edx"); | |||
} | |||
#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) | |||
U32 n; | |||
__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); | |||
if (n >= 1) { | |||
U32 f1a; | |||
__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); | |||
} | |||
if (n >= 7) { | |||
U32 f7a; | |||
__asm__("cpuid" | |||
: "=a"(f7a), "=b"(f7b), "=c"(f7c) | |||
: "a"(7), "c"(0) | |||
: "edx"); | |||
} | |||
#endif | |||
{ | |||
ZSTD_cpuid_t cpuid; | |||
cpuid.f1c = f1c; | |||
cpuid.f1d = f1d; | |||
cpuid.f7b = f7b; | |||
cpuid.f7c = f7c; | |||
return cpuid; | |||
} | |||
} | |||
#define X(name, r, bit) \ | |||
MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ | |||
return ((cpuid.r) & (1U << bit)) != 0; \ | |||
} | |||
/* cpuid(1): Processor Info and Feature Bits. */ | |||
#define C(name, bit) X(name, f1c, bit) | |||
C(sse3, 0) | |||
C(pclmuldq, 1) | |||
C(dtes64, 2) | |||
C(monitor, 3) | |||
C(dscpl, 4) | |||
C(vmx, 5) | |||
C(smx, 6) | |||
C(eist, 7) | |||
C(tm2, 8) | |||
C(ssse3, 9) | |||
C(cnxtid, 10) | |||
C(fma, 12) | |||
C(cx16, 13) | |||
C(xtpr, 14) | |||
C(pdcm, 15) | |||
C(pcid, 17) | |||
C(dca, 18) | |||
C(sse41, 19) | |||
C(sse42, 20) | |||
C(x2apic, 21) | |||
C(movbe, 22) | |||
C(popcnt, 23) | |||
C(tscdeadline, 24) | |||
C(aes, 25) | |||
C(xsave, 26) | |||
C(osxsave, 27) | |||
C(avx, 28) | |||
C(f16c, 29) | |||
C(rdrand, 30) | |||
#undef C | |||
#define D(name, bit) X(name, f1d, bit) | |||
D(fpu, 0) | |||
D(vme, 1) | |||
D(de, 2) | |||
D(pse, 3) | |||
D(tsc, 4) | |||
D(msr, 5) | |||
D(pae, 6) | |||
D(mce, 7) | |||
D(cx8, 8) | |||
D(apic, 9) | |||
D(sep, 11) | |||
D(mtrr, 12) | |||
D(pge, 13) | |||
D(mca, 14) | |||
D(cmov, 15) | |||
D(pat, 16) | |||
D(pse36, 17) | |||
D(psn, 18) | |||
D(clfsh, 19) | |||
D(ds, 21) | |||
D(acpi, 22) | |||
D(mmx, 23) | |||
D(fxsr, 24) | |||
D(sse, 25) | |||
D(sse2, 26) | |||
D(ss, 27) | |||
D(htt, 28) | |||
D(tm, 29) | |||
D(pbe, 31) | |||
#undef D | |||
/* cpuid(7): Extended Features. */ | |||
#define B(name, bit) X(name, f7b, bit) | |||
B(bmi1, 3) | |||
B(hle, 4) | |||
B(avx2, 5) | |||
B(smep, 7) | |||
B(bmi2, 8) | |||
B(erms, 9) | |||
B(invpcid, 10) | |||
B(rtm, 11) | |||
B(mpx, 14) | |||
B(avx512f, 16) | |||
B(avx512dq, 17) | |||
B(rdseed, 18) | |||
B(adx, 19) | |||
B(smap, 20) | |||
B(avx512ifma, 21) | |||
B(pcommit, 22) | |||
B(clflushopt, 23) | |||
B(clwb, 24) | |||
B(avx512pf, 26) | |||
B(avx512er, 27) | |||
B(avx512cd, 28) | |||
B(sha, 29) | |||
B(avx512bw, 30) | |||
B(avx512vl, 31) | |||
#undef B | |||
#define C(name, bit) X(name, f7c, bit) | |||
C(prefetchwt1, 0) | |||
C(avx512vbmi, 1) | |||
#undef C | |||
#undef X | |||
#endif /* ZSTD_COMMON_CPU_H */ |
@@ -0,0 +1,24 @@ | |||
/* ****************************************************************** | |||
* debug | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* | |||
* This module only hosts one global variable | |||
* which can be used to dynamically influence the verbosity of traces, | |||
* such as DEBUGLOG and RAWLOG | |||
*/ | |||
#include "debug.h" | |||
int g_debuglevel = DEBUGLEVEL; |
@@ -0,0 +1,114 @@ | |||
/* ****************************************************************** | |||
* debug | |||
* Part of FSE library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* | |||
* The purpose of this header is to enable debug functions. | |||
* They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, | |||
* and DEBUG_STATIC_ASSERT() for compile-time. | |||
* | |||
* By default, DEBUGLEVEL==0, which means run-time debug is disabled. | |||
* | |||
* Level 1 enables assert() only. | |||
* Starting level 2, traces can be generated and pushed to stderr. | |||
* The higher the level, the more verbose the traces. | |||
* | |||
* It's possible to dynamically adjust level using variable g_debug_level, | |||
* which is only declared if DEBUGLEVEL>=2, | |||
* and is a global variable, not multi-thread protected (use with care) | |||
*/ | |||
#ifndef DEBUG_H_12987983217 | |||
#define DEBUG_H_12987983217 | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/* static assert is triggered at compile time, leaving no runtime artefact. | |||
* static assert only works with compile-time constants. | |||
* Also, this variant can only be used inside a function. */ | |||
#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) | |||
/* DEBUGLEVEL is expected to be defined externally, | |||
* typically through compiler command line. | |||
* Value must be a number. */ | |||
#ifndef DEBUGLEVEL | |||
# define DEBUGLEVEL 0 | |||
#endif | |||
/* DEBUGFILE can be defined externally, | |||
* typically through compiler command line. | |||
* note : currently useless. | |||
* Value must be stderr or stdout */ | |||
#ifndef DEBUGFILE | |||
# define DEBUGFILE stderr | |||
#endif | |||
/* recommended values for DEBUGLEVEL : | |||
* 0 : release mode, no debug, all run-time checks disabled | |||
* 1 : enables assert() only, no display | |||
* 2 : reserved, for currently active debug path | |||
* 3 : events once per object lifetime (CCtx, CDict, etc.) | |||
* 4 : events once per frame | |||
* 5 : events once per block | |||
* 6 : events once per sequence (verbose) | |||
* 7+: events at every position (*very* verbose) | |||
* | |||
* It's generally inconvenient to output traces > 5. | |||
* In which case, it's possible to selectively trigger high verbosity levels | |||
* by modifying g_debug_level. | |||
*/ | |||
#if (DEBUGLEVEL>=1) | |||
# include <assert.h> | |||
#else | |||
# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ | |||
# define assert(condition) ((void)0) /* disable assert (default) */ | |||
# endif | |||
#endif | |||
#if (DEBUGLEVEL>=2) | |||
# include <stdio.h> | |||
extern int g_debuglevel; /* the variable is only declared, | |||
it actually lives in debug.c, | |||
and is shared by the whole process. | |||
It's not thread-safe. | |||
It's useful when enabling very verbose levels | |||
on selective conditions (such as position in src) */ | |||
# define RAWLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
fprintf(stderr, __VA_ARGS__); \ | |||
} } | |||
# define DEBUGLOG(l, ...) { \ | |||
if (l<=g_debuglevel) { \ | |||
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ | |||
fprintf(stderr, " \n"); \ | |||
} } | |||
#else | |||
# define RAWLOG(l, ...) {} /* disabled */ | |||
# define DEBUGLOG(l, ...) {} /* disabled */ | |||
#endif | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* DEBUG_H_12987983217 */ |
@@ -1,36 +1,16 @@ | |||
/* | |||
Common functions of New Generation Entropy library | |||
Copyright (C) 2016, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
- Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
*************************************************************************** */ | |||
/* ****************************************************************** | |||
* Common functions of New Generation Entropy library | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* ************************************* | |||
* Dependencies | |||
@@ -72,7 +52,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
unsigned charnum = 0; | |||
int previous0 = 0; | |||
if (hbSize < 4) return ERROR(srcSize_wrong); | |||
if (hbSize < 4) { | |||
/* This function only works when hbSize >= 4 */ | |||
char buffer[4]; | |||
memset(buffer, 0, sizeof(buffer)); | |||
memcpy(buffer, headerBuffer, hbSize); | |||
{ size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | |||
buffer, sizeof(buffer)); | |||
if (FSE_isError(countSize)) return countSize; | |||
if (countSize > hbSize) return ERROR(corruption_detected); | |||
return countSize; | |||
} } | |||
assert(hbSize >= 4); | |||
/* init */ | |||
memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | |||
bitStream = MEM_readLE32(ip); | |||
nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | |||
if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | |||
@@ -105,6 +99,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t | |||
if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | |||
while (charnum < n0) normalizedCounter[charnum++] = 0; | |||
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | |||
assert((bitCount >> 3) <= 3); /* For first condition to work */ | |||
ip += bitCount>>3; | |||
bitCount &= 7; | |||
bitStream = MEM_readLE32(ip) >> bitCount; |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -14,6 +14,10 @@ | |||
const char* ERR_getErrorString(ERR_enum code) | |||
{ | |||
#ifdef ZSTD_STRIP_ERROR_STRINGS | |||
(void)code; | |||
return "Error strings stripped"; | |||
#else | |||
static const char* const notErrorCode = "Unspecified error code"; | |||
switch( code ) | |||
{ | |||
@@ -29,18 +33,23 @@ const char* ERR_getErrorString(ERR_enum code) | |||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; | |||
case PREFIX(init_missing): return "Context should be init first"; | |||
case PREFIX(memory_allocation): return "Allocation error : not enough memory"; | |||
case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; | |||
case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; | |||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; | |||
case PREFIX(srcSize_wrong): return "Src size is incorrect"; | |||
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; | |||
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; | |||
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; | |||
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; | |||
case PREFIX(dictionary_wrong): return "Dictionary mismatch"; | |||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; | |||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; | |||
case PREFIX(srcSize_wrong): return "Src size is incorrect"; | |||
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; | |||
/* following error codes are not stable and may be removed or changed in a future version */ | |||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; | |||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; | |||
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; | |||
case PREFIX(maxCode): | |||
default: return notErrorCode; | |||
} | |||
#endif | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum; | |||
/*-**************************************** | |||
* Error codes handling | |||
******************************************/ | |||
#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ | |||
#undef ERROR /* already defined on Visual Studio */ | |||
#define ERROR(name) ZSTD_ERROR(name) | |||
#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) | |||
@@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } | |||
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } | |||
/* check and forward error code */ | |||
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e | |||
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } | |||
/*-**************************************** | |||
* Error Strings |
@@ -1,35 +1,15 @@ | |||
/* ****************************************************************** | |||
FSE : Finite State Entropy codec | |||
Public Prototypes declaration | |||
Copyright (C) 2013-2016, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* FSE : Finite State Entropy codec | |||
* Public Prototypes declaration | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
#if defined (__cplusplus) | |||
@@ -72,6 +52,7 @@ extern "C" { | |||
#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) | |||
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ | |||
/*-**************************************** | |||
* FSE simple functions | |||
******************************************/ | |||
@@ -129,7 +110,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, | |||
******************************************/ | |||
/*! | |||
FSE_compress() does the following: | |||
1. count symbol occurrence from source[] into table count[] | |||
1. count symbol occurrence from source[] into table count[] (see hist.h) | |||
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) | |||
3. save normalized counters to memory buffer using writeNCount() | |||
4. build encoding table 'CTable' from normalized counters | |||
@@ -147,15 +128,6 @@ or to save and provide normalized distribution using external method. | |||
/* *** COMPRESSION *** */ | |||
/*! FSE_count(): | |||
Provides the precise count of each byte within a table 'count'. | |||
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). | |||
*maxSymbolValuePtr will be updated if detected smaller than initial value. | |||
@return : the count of the most frequent symbol (which is not identified). | |||
if return == srcSize, there is only one symbol. | |||
Can also return an error code, which can be tested with FSE_isError(). */ | |||
FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |||
/*! FSE_optimalTableLog(): | |||
dynamically downsize 'tableLog' when conditions are met. | |||
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. | |||
@@ -167,7 +139,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize | |||
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). | |||
@return : tableLog, | |||
or an errorCode, which can be tested using FSE_isError() */ | |||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); | |||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, | |||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue); | |||
/*! FSE_NCountWriteBound(): | |||
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. | |||
@@ -178,13 +151,14 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab | |||
Compactly save 'normalizedCounter' into 'buffer'. | |||
@return : size of the compressed table, | |||
or an errorCode, which can be tested using FSE_isError(). */ | |||
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); | |||
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, | |||
const short* normalizedCounter, | |||
unsigned maxSymbolValue, unsigned tableLog); | |||
/*! Constructor and Destructor of FSE_CTable. | |||
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ | |||
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ | |||
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); | |||
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); | |||
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); | |||
/*! FSE_buildCTable(): | |||
@@ -250,7 +224,9 @@ If there is an error, the function will return an ErrorCode (which can be tested | |||
@return : size read from 'rBuffer', | |||
or an errorCode, which can be tested using FSE_isError(). | |||
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ | |||
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); | |||
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, | |||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, | |||
const void* rBuffer, size_t rBuffSize); | |||
/*! Constructor and Destructor of FSE_DTable. | |||
Note that its size depends on 'tableLog' */ | |||
@@ -312,7 +288,7 @@ If there is an error, the function will return an error code, which can be teste | |||
*******************************************/ | |||
/* FSE buffer bounds */ | |||
#define FSE_NCOUNTBOUND 512 | |||
#define FSE_BLOCKBOUND(size) (size + (size>>7)) | |||
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) | |||
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ | |||
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ | |||
@@ -325,33 +301,8 @@ If there is an error, the function will return an error code, which can be teste | |||
/* ***************************************** | |||
* FSE advanced API | |||
*******************************************/ | |||
/* FSE_count_wksp() : | |||
* Same as FSE_count(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be table of >= `1024` unsigned | |||
*/ | |||
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, unsigned* workSpace); | |||
/** FSE_countFast() : | |||
* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr | |||
*/ | |||
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |||
/* FSE_countFast_wksp() : | |||
* Same as FSE_countFast(), but using an externally provided scratch buffer. | |||
* `workSpace` must be a table of minimum `1024` unsigned | |||
*/ | |||
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); | |||
/*! FSE_count_simple | |||
* Same as FSE_countFast(), but does not use any additional memory (not even on stack). | |||
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). | |||
*/ | |||
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |||
* FSE advanced API | |||
***************************************** */ | |||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); | |||
/**< same as FSE_optimalTableLog(), which used `minus==2` */ | |||
@@ -387,7 +338,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size | |||
typedef enum { | |||
FSE_repeat_none, /**< Cannot use the previous table */ | |||
FSE_repeat_check, /**< Can use the previous table but it must be checked */ | |||
FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */ | |||
FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ | |||
} FSE_repeat; | |||
/* ***************************************** | |||
@@ -541,7 +492,7 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) | |||
const U32 tableLog = MEM_read16(ptr); | |||
statePtr->value = (ptrdiff_t)1<<tableLog; | |||
statePtr->stateTable = u16ptr+2; | |||
statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); | |||
statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); | |||
statePtr->stateLog = tableLog; | |||
} | |||
@@ -560,7 +511,7 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3 | |||
} | |||
} | |||
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) | |||
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) | |||
{ | |||
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; | |||
const U16* const stateTable = (const U16*)(statePtr->stateTable); | |||
@@ -576,6 +527,39 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt | |||
} | |||
/* FSE_getMaxNbBits() : | |||
* Approximate maximum cost of a symbol, in bits. | |||
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) | |||
* note 1 : assume symbolValue is valid (<= maxSymbolValue) | |||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ | |||
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) | |||
{ | |||
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; | |||
return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; | |||
} | |||
/* FSE_bitCost() : | |||
* Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) | |||
* note 1 : assume symbolValue is valid (<= maxSymbolValue) | |||
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ | |||
MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) | |||
{ | |||
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; | |||
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; | |||
U32 const threshold = (minNbBits+1) << 16; | |||
assert(tableLog < 16); | |||
assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ | |||
{ U32 const tableSize = 1 << tableLog; | |||
U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); | |||
U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ | |||
U32 const bitMultiplier = 1 << accuracyLog; | |||
assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); | |||
assert(normalizedDeltaFromThreshold <= bitMultiplier); | |||
return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; | |||
} | |||
} | |||
/* ====== Decompression ====== */ | |||
typedef struct { |
@@ -1,35 +1,15 @@ | |||
/* ****************************************************************** | |||
FSE : Finite State Entropy encoder | |||
Copyright (C) 2013-2015, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
- Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* FSE : Finite State Entropy encoder | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* ************************************************************** | |||
@@ -37,9 +17,11 @@ | |||
****************************************************************/ | |||
#include <stdlib.h> /* malloc, free, qsort */ | |||
#include <string.h> /* memcpy, memset */ | |||
#include <stdio.h> /* printf (debug) */ | |||
#include "bitstream.h" | |||
#include "compiler.h" | |||
#include "mem.h" /* U32, U16, etc. */ | |||
#include "debug.h" /* assert, DEBUGLOG */ | |||
#include "hist.h" /* HIST_count_wksp */ | |||
#include "bitstream.h" | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#include "error_private.h" | |||
@@ -49,7 +31,6 @@ | |||
* Error Management | |||
****************************************************************/ | |||
#define FSE_isError ERR_isError | |||
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ | |||
/* ************************************************************** | |||
@@ -82,7 +63,9 @@ | |||
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` | |||
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements | |||
*/ | |||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, | |||
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize) | |||
{ | |||
U32 const tableSize = 1 << tableLog; | |||
U32 const tableMask = tableSize - 1; | |||
@@ -100,14 +83,19 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi | |||
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); | |||
tableU16[-2] = (U16) tableLog; | |||
tableU16[-1] = (U16) maxSymbolValue; | |||
assert(tableLog < 16); /* required for threshold strategy to work */ | |||
/* For explanations on how to distribute symbol values over the table : | |||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | |||
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ | |||
#ifdef __clang_analyzer__ | |||
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ | |||
#endif | |||
/* symbol start positions */ | |||
{ U32 u; | |||
cumul[0] = 0; | |||
for (u=1; u<=maxSymbolValue+1; u++) { | |||
for (u=1; u <= maxSymbolValue+1; u++) { | |||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ | |||
cumul[u] = cumul[u-1] + 1; | |||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); | |||
@@ -121,14 +109,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi | |||
{ U32 position = 0; | |||
U32 symbol; | |||
for (symbol=0; symbol<=maxSymbolValue; symbol++) { | |||
int nbOccurences; | |||
for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { | |||
int nbOccurrences; | |||
int const freq = normalizedCounter[symbol]; | |||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { | |||
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; | |||
position = (position + step) & tableMask; | |||
while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ | |||
while (position > highThreshold) | |||
position = (position + step) & tableMask; /* Low proba area */ | |||
} } | |||
if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ | |||
assert(position==0); /* Must have initialized all positions */ | |||
} | |||
/* Build table */ | |||
@@ -143,7 +133,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi | |||
for (s=0; s<=maxSymbolValue; s++) { | |||
switch (normalizedCounter[s]) | |||
{ | |||
case 0: break; | |||
case 0: | |||
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */ | |||
symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog); | |||
break; | |||
case -1: | |||
case 1: | |||
@@ -160,6 +153,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi | |||
total += normalizedCounter[s]; | |||
} } } } | |||
#if 0 /* debug : symbol costs */ | |||
DEBUGLOG(5, "\n --- table statistics : "); | |||
{ U32 symbol; | |||
for (symbol=0; symbol<=maxSymbolValue; symbol++) { | |||
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", | |||
symbol, normalizedCounter[symbol], | |||
FSE_getMaxNbBits(symbolTT, symbol), | |||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); | |||
} | |||
} | |||
#endif | |||
return 0; | |||
} | |||
@@ -174,8 +179,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned | |||
#ifndef FSE_COMMONDEFS_ONLY | |||
/*-************************************************************** | |||
* FSE NCount encoding-decoding | |||
* FSE NCount encoding | |||
****************************************************************/ | |||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
@@ -183,9 +189,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) | |||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ | |||
} | |||
static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, | |||
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, | |||
unsigned writeIsSafe) | |||
static size_t | |||
FSE_writeNCount_generic (void* header, size_t headerBufferSize, | |||
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, | |||
unsigned writeIsSafe) | |||
{ | |||
BYTE* const ostart = (BYTE*) header; | |||
BYTE* out = ostart; | |||
@@ -194,13 +201,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, | |||
const int tableSize = 1 << tableLog; | |||
int remaining; | |||
int threshold; | |||
U32 bitStream; | |||
int bitCount; | |||
unsigned charnum = 0; | |||
int previous0 = 0; | |||
U32 bitStream = 0; | |||
int bitCount = 0; | |||
unsigned symbol = 0; | |||
unsigned const alphabetSize = maxSymbolValue + 1; | |||
int previousIs0 = 0; | |||
bitStream = 0; | |||
bitCount = 0; | |||
/* Table Size */ | |||
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; | |||
bitCount += 4; | |||
@@ -210,48 +216,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, | |||
threshold = tableSize; | |||
nbBits = tableLog+1; | |||
while (remaining>1) { /* stops at 1 */ | |||
if (previous0) { | |||
unsigned start = charnum; | |||
while (!normalizedCounter[charnum]) charnum++; | |||
while (charnum >= start+24) { | |||
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ | |||
if (previousIs0) { | |||
unsigned start = symbol; | |||
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; | |||
if (symbol == alphabetSize) break; /* incorrect distribution */ | |||
while (symbol >= start+24) { | |||
start+=24; | |||
bitStream += 0xFFFFU << bitCount; | |||
if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
if ((!writeIsSafe) && (out > oend-2)) | |||
return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
out[0] = (BYTE) bitStream; | |||
out[1] = (BYTE)(bitStream>>8); | |||
out+=2; | |||
bitStream>>=16; | |||
} | |||
while (charnum >= start+3) { | |||
while (symbol >= start+3) { | |||
start+=3; | |||
bitStream += 3 << bitCount; | |||
bitCount += 2; | |||
} | |||
bitStream += (charnum-start) << bitCount; | |||
bitStream += (symbol-start) << bitCount; | |||
bitCount += 2; | |||
if (bitCount>16) { | |||
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
if ((!writeIsSafe) && (out > oend - 2)) | |||
return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
out[0] = (BYTE)bitStream; | |||
out[1] = (BYTE)(bitStream>>8); | |||
out += 2; | |||
bitStream >>= 16; | |||
bitCount -= 16; | |||
} } | |||
{ int count = normalizedCounter[charnum++]; | |||
int const max = (2*threshold-1)-remaining; | |||
{ int count = normalizedCounter[symbol++]; | |||
int const max = (2*threshold-1) - remaining; | |||
remaining -= count < 0 ? -count : count; | |||
count++; /* +1 for extra accuracy */ | |||
if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ | |||
if (count>=threshold) | |||
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ | |||
bitStream += count << bitCount; | |||
bitCount += nbBits; | |||
bitCount -= (count<max); | |||
previous0 = (count==1); | |||
previousIs0 = (count==1); | |||
if (remaining<1) return ERROR(GENERIC); | |||
while (remaining<threshold) nbBits--, threshold>>=1; | |||
while (remaining<threshold) { nbBits--; threshold>>=1; } | |||
} | |||
if (bitCount>16) { | |||
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
if ((!writeIsSafe) && (out > oend - 2)) | |||
return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
out[0] = (BYTE)bitStream; | |||
out[1] = (BYTE)(bitStream>>8); | |||
out += 2; | |||
@@ -259,19 +270,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, | |||
bitCount -= 16; | |||
} } | |||
if (remaining != 1) | |||
return ERROR(GENERIC); /* incorrect normalized distribution */ | |||
assert(symbol <= alphabetSize); | |||
/* flush remaining bitStream */ | |||
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
if ((!writeIsSafe) && (out > oend - 2)) | |||
return ERROR(dstSize_tooSmall); /* Buffer overflow */ | |||
out[0] = (BYTE)bitStream; | |||
out[1] = (BYTE)(bitStream>>8); | |||
out+= (bitCount+7) /8; | |||
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); | |||
return (out-ostart); | |||
} | |||
size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |||
size_t FSE_writeNCount (void* buffer, size_t bufferSize, | |||
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ | |||
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ | |||
@@ -279,171 +294,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized | |||
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) | |||
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); | |||
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); | |||
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); | |||
} | |||
/*-************************************************************** | |||
* Counting histogram | |||
****************************************************************/ | |||
/*! FSE_count_simple | |||
This function counts byte values within `src`, and store the histogram into table `count`. | |||
It doesn't use any additional memory. | |||
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. | |||
For this reason, prefer using a table `count` with 256 elements. | |||
@return : count of most numerous element | |||
*/ | |||
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
const BYTE* ip = (const BYTE*)src; | |||
const BYTE* const end = ip + srcSize; | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
unsigned max=0; | |||
memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); | |||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } | |||
while (ip<end) count[*ip++]++; | |||
while (!count[maxSymbolValue]) maxSymbolValue--; | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } | |||
return (size_t)max; | |||
} | |||
/* FSE_count_parallel_wksp() : | |||
* Same as FSE_count_parallel(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ | |||
static size_t FSE_count_parallel_wksp( | |||
unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, | |||
unsigned checkMax, unsigned* const workSpace) | |||
{ | |||
const BYTE* ip = (const BYTE*)source; | |||
const BYTE* const iend = ip+sourceSize; | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
unsigned max=0; | |||
U32* const Counting1 = workSpace; | |||
U32* const Counting2 = Counting1 + 256; | |||
U32* const Counting3 = Counting2 + 256; | |||
U32* const Counting4 = Counting3 + 256; | |||
memset(Counting1, 0, 4*256*sizeof(unsigned)); | |||
/* safety checks */ | |||
if (!sourceSize) { | |||
memset(count, 0, maxSymbolValue + 1); | |||
*maxSymbolValuePtr = 0; | |||
return 0; | |||
} | |||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ | |||
/* by stripes of 16 bytes */ | |||
{ U32 cached = MEM_read32(ip); ip += 4; | |||
while (ip < iend-15) { | |||
U32 c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
} | |||
ip-=4; | |||
} | |||
/* finish last symbols */ | |||
while (ip<iend) Counting1[*ip++]++; | |||
if (checkMax) { /* verify stats will fit into destination table */ | |||
U32 s; for (s=255; s>maxSymbolValue; s--) { | |||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); | |||
} } | |||
{ U32 s; for (s=0; s<=maxSymbolValue; s++) { | |||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (count[s] > max) max = count[s]; | |||
} } | |||
while (!count[maxSymbolValue]) maxSymbolValue--; | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
return (size_t)max; | |||
} | |||
/* FSE_countFast_wksp() : | |||
* Same as FSE_countFast(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be table of >= `1024` unsigned */ | |||
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, unsigned* workSpace) | |||
{ | |||
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); | |||
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); | |||
} | |||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ | |||
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize) | |||
{ | |||
unsigned tmpCounters[1024]; | |||
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); | |||
} | |||
/* FSE_count_wksp() : | |||
* Same as FSE_count(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be table of >= `1024` unsigned */ | |||
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, unsigned* workSpace) | |||
{ | |||
if (*maxSymbolValuePtr < 255) | |||
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); | |||
*maxSymbolValuePtr = 255; | |||
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); | |||
} | |||
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
unsigned tmpCounters[1024]; | |||
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); | |||
} | |||
/*-************************************************************** | |||
* FSE Compression Code | |||
****************************************************************/ | |||
/*! FSE_sizeof_CTable() : | |||
FSE_CTable is a variable size structure which contains : | |||
`U16 tableLog;` | |||
`U16 maxSymbolValue;` | |||
`U16 nextStateNumber[1 << tableLog];` // This size is variable | |||
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable | |||
Allocation is manual (C standard does not support variable-size structures). | |||
*/ | |||
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |||
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); | |||
} | |||
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
@@ -458,7 +315,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); } | |||
/* provides the minimum logSize to safely represent a distribution */ | |||
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) | |||
{ | |||
U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; | |||
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; | |||
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; | |||
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; | |||
assert(srcSize > 1); /* Not supported, RLE should be used instead */ | |||
@@ -521,6 +378,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
} | |||
ToDistribute = (1 << tableLog) - distributed; | |||
if (ToDistribute == 0) | |||
return 0; | |||
if ((total / ToDistribute) > lowOne) { | |||
/* risk of rounding to zero */ | |||
lowOne = (U32)((total * 3) / (ToDistribute * 2)); | |||
@@ -540,7 +400,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
find max, then give all remaining points to max */ | |||
U32 maxV = 0, maxC = 0; | |||
for (s=0; s<=maxSymbolValue; s++) | |||
if (count[s] > maxC) maxV=s, maxC=count[s]; | |||
if (count[s] > maxC) { maxV=s; maxC=count[s]; } | |||
norm[maxV] += (short)ToDistribute; | |||
return 0; | |||
} | |||
@@ -548,7 +408,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, | |||
if (total == 0) { | |||
/* all of the symbols were low enough for the lowOne or lowThreshold */ | |||
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) | |||
if (norm[s] > 0) ToDistribute--, norm[s]++; | |||
if (norm[s] > 0) { ToDistribute--; norm[s]++; } | |||
return 0; | |||
} | |||
@@ -582,7 +442,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ | |||
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ | |||
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; | |||
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; | |||
U64 const scale = 62 - tableLog; | |||
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ | |||
U64 const vStep = 1ULL<<(scale-20); | |||
@@ -604,7 +464,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
U64 restToBeat = vStep * rtbTable[proba]; | |||
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; | |||
} | |||
if (proba > largestP) largestP=proba, largest=s; | |||
if (proba > largestP) { largestP=proba; largest=s; } | |||
normalizedCounter[s] = proba; | |||
stillToDistribute -= proba; | |||
} } | |||
@@ -621,11 +481,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, | |||
U32 s; | |||
U32 nTotal = 0; | |||
for (s=0; s<=maxSymbolValue; s++) | |||
printf("%3i: %4i \n", s, normalizedCounter[s]); | |||
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); | |||
for (s=0; s<=maxSymbolValue; s++) | |||
nTotal += abs(normalizedCounter[s]); | |||
if (nTotal != (1U<<tableLog)) | |||
printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); | |||
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); | |||
getchar(); | |||
} | |||
#endif | |||
@@ -765,9 +625,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, | |||
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } | |||
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e | |||
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } | |||
/* FSE_compress_wksp() : | |||
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |||
* `wkspSize` size must be `(1<<tableLog)`. | |||
@@ -778,7 +635,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src | |||
BYTE* op = ostart; | |||
BYTE* const oend = ostart + dstSize; | |||
U32 count[FSE_MAX_SYMBOL_VALUE+1]; | |||
unsigned count[FSE_MAX_SYMBOL_VALUE+1]; | |||
S16 norm[FSE_MAX_SYMBOL_VALUE+1]; | |||
FSE_CTable* CTable = (FSE_CTable*)workSpace; | |||
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); | |||
@@ -792,7 +649,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src | |||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; | |||
/* Scan input and build symbol stats */ | |||
{ CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); | |||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) ); | |||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ | |||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ | |||
@@ -827,7 +684,7 @@ typedef struct { | |||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) | |||
{ | |||
fseWkspMax_t scratchBuffer; | |||
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ | |||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ | |||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); | |||
} |
@@ -1,35 +1,15 @@ | |||
/* ****************************************************************** | |||
FSE : Finite State Entropy decoder | |||
Copyright (C) 2013-2015, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
- Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* FSE : Finite State Entropy decoder | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
@@ -49,10 +29,7 @@ | |||
* Error Management | |||
****************************************************************/ | |||
#define FSE_isError ERR_isError | |||
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ | |||
/* check and forward error code */ | |||
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } | |||
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ | |||
/* ************************************************************** | |||
@@ -139,8 +116,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned | |||
{ U32 u; | |||
for (u=0; u<tableSize; u++) { | |||
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | |||
U16 nextState = symbolNext[symbol]++; | |||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); | |||
U32 const nextState = symbolNext[symbol]++; | |||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); | |||
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | |||
} } | |||
@@ -285,7 +262,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size | |||
/* normal FSE decoding mode */ | |||
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); | |||
if (FSE_isError(NCountLength)) return NCountLength; | |||
//if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ | |||
/* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ | |||
if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | |||
ip += NCountLength; | |||
cSrcSize -= NCountLength; |
@@ -0,0 +1,183 @@ | |||
/* ****************************************************************** | |||
* hist : Histogram functions | |||
* part of Finite State Entropy project | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* --- dependencies --- */ | |||
#include "mem.h" /* U32, BYTE, etc. */ | |||
#include "debug.h" /* assert, DEBUGLOG */ | |||
#include "error_private.h" /* ERROR */ | |||
#include "hist.h" | |||
/* --- Error management --- */ | |||
unsigned HIST_isError(size_t code) { return ERR_isError(code); } | |||
/*-************************************************************** | |||
* Histogram functions | |||
****************************************************************/ | |||
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
const BYTE* ip = (const BYTE*)src; | |||
const BYTE* const end = ip + srcSize; | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
unsigned largestCount=0; | |||
memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); | |||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } | |||
while (ip<end) { | |||
assert(*ip <= maxSymbolValue); | |||
count[*ip++]++; | |||
} | |||
while (!count[maxSymbolValue]) maxSymbolValue--; | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
{ U32 s; | |||
for (s=0; s<=maxSymbolValue; s++) | |||
if (count[s] > largestCount) largestCount = count[s]; | |||
} | |||
return largestCount; | |||
} | |||
typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; | |||
/* HIST_count_parallel_wksp() : | |||
* store histogram into 4 intermediate tables, recombined at the end. | |||
* this design makes better use of OoO cpus, | |||
* and is noticeably faster when some values are heavily repeated. | |||
* But it needs some additional workspace for intermediate tables. | |||
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. | |||
* @return : largest histogram frequency, | |||
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ | |||
static size_t HIST_count_parallel_wksp( | |||
unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, | |||
HIST_checkInput_e check, | |||
U32* const workSpace) | |||
{ | |||
const BYTE* ip = (const BYTE*)source; | |||
const BYTE* const iend = ip+sourceSize; | |||
unsigned maxSymbolValue = *maxSymbolValuePtr; | |||
unsigned max=0; | |||
U32* const Counting1 = workSpace; | |||
U32* const Counting2 = Counting1 + 256; | |||
U32* const Counting3 = Counting2 + 256; | |||
U32* const Counting4 = Counting3 + 256; | |||
memset(workSpace, 0, 4*256*sizeof(unsigned)); | |||
/* safety checks */ | |||
if (!sourceSize) { | |||
memset(count, 0, maxSymbolValue + 1); | |||
*maxSymbolValuePtr = 0; | |||
return 0; | |||
} | |||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ | |||
/* by stripes of 16 bytes */ | |||
{ U32 cached = MEM_read32(ip); ip += 4; | |||
while (ip < iend-15) { | |||
U32 c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
c = cached; cached = MEM_read32(ip); ip += 4; | |||
Counting1[(BYTE) c ]++; | |||
Counting2[(BYTE)(c>>8) ]++; | |||
Counting3[(BYTE)(c>>16)]++; | |||
Counting4[ c>>24 ]++; | |||
} | |||
ip-=4; | |||
} | |||
/* finish last symbols */ | |||
while (ip<iend) Counting1[*ip++]++; | |||
if (check) { /* verify stats will fit into destination table */ | |||
U32 s; for (s=255; s>maxSymbolValue; s--) { | |||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); | |||
} } | |||
{ U32 s; | |||
if (maxSymbolValue > 255) maxSymbolValue = 255; | |||
for (s=0; s<=maxSymbolValue; s++) { | |||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |||
if (count[s] > max) max = count[s]; | |||
} } | |||
while (!count[maxSymbolValue]) maxSymbolValue--; | |||
*maxSymbolValuePtr = maxSymbolValue; | |||
return (size_t)max; | |||
} | |||
/* HIST_countFast_wksp() : | |||
* Same as HIST_countFast(), but using an externally provided scratch buffer. | |||
* `workSpace` is a writable buffer which must be 4-bytes aligned, | |||
* `workSpaceSize` must be >= HIST_WKSP_SIZE | |||
*/ | |||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, | |||
void* workSpace, size_t workSpaceSize) | |||
{ | |||
if (sourceSize < 1500) /* heuristic threshold */ | |||
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); | |||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ | |||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); | |||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); | |||
} | |||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ | |||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize) | |||
{ | |||
unsigned tmpCounters[HIST_WKSP_SIZE_U32]; | |||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); | |||
} | |||
/* HIST_count_wksp() : | |||
* Same as HIST_count(), but using an externally provided scratch buffer. | |||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ | |||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* source, size_t sourceSize, | |||
void* workSpace, size_t workSpaceSize) | |||
{ | |||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ | |||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); | |||
if (*maxSymbolValuePtr < 255) | |||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); | |||
*maxSymbolValuePtr = 255; | |||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); | |||
} | |||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize) | |||
{ | |||
unsigned tmpCounters[HIST_WKSP_SIZE_U32]; | |||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); | |||
} |
@@ -0,0 +1,75 @@ | |||
/* ****************************************************************** | |||
* hist : Histogram functions | |||
* part of Finite State Entropy project | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* --- dependencies --- */ | |||
#include <stddef.h> /* size_t */ | |||
/* --- simple histogram functions --- */ | |||
/*! HIST_count(): | |||
* Provides the precise count of each byte within a table 'count'. | |||
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). | |||
* Updates *maxSymbolValuePtr with actual largest symbol value detected. | |||
* @return : count of the most frequent symbol (which isn't identified). | |||
* or an error code, which can be tested using HIST_isError(). | |||
* note : if return == srcSize, there is only one symbol. | |||
*/ | |||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize); | |||
unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ | |||
/* --- advanced histogram functions --- */ | |||
#define HIST_WKSP_SIZE_U32 1024 | |||
#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) | |||
/** HIST_count_wksp() : | |||
* Same as HIST_count(), but using an externally provided scratch buffer. | |||
* Benefit is this function will use very little stack space. | |||
* `workSpace` is a writable buffer which must be 4-bytes aligned, | |||
* `workSpaceSize` must be >= HIST_WKSP_SIZE | |||
*/ | |||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t workSpaceSize); | |||
/** HIST_countFast() : | |||
* same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. | |||
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` | |||
*/ | |||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize); | |||
/** HIST_countFast_wksp() : | |||
* Same as HIST_countFast(), but using an externally provided scratch buffer. | |||
* `workSpace` is a writable buffer which must be 4-bytes aligned, | |||
* `workSpaceSize` must be >= HIST_WKSP_SIZE | |||
*/ | |||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize, | |||
void* workSpace, size_t workSpaceSize); | |||
/*! HIST_count_simple() : | |||
* Same as HIST_countFast(), this function is unsafe, | |||
* and will segfault if any value within `src` is `> *maxSymbolValuePtr`. | |||
* It is also a bit slower for large inputs. | |||
* However, it does not need any additional memory (not even on stack). | |||
* @return : count of the most frequent symbol. | |||
* Note this function doesn't produce any error (i.e. it must succeed). | |||
*/ | |||
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, | |||
const void* src, size_t srcSize); |
@@ -1,35 +1,15 @@ | |||
/* ****************************************************************** | |||
Huffman coder, part of New Generation Entropy library | |||
header file | |||
Copyright (C) 2013-2016, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* huff0 huffman codec, | |||
* part of Finite State Entropy library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
#if defined (__cplusplus) | |||
@@ -58,32 +38,32 @@ extern "C" { | |||
#endif | |||
/* *** simple functions *** */ | |||
/** | |||
HUF_compress() : | |||
Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. | |||
'dst' buffer must be already allocated. | |||
Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). | |||
`srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. | |||
@return : size of compressed data (<= `dstCapacity`). | |||
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! | |||
if return == 1, srcData is a single repeated byte symbol (RLE compression). | |||
if HUF_isError(return), compression failed (more details using HUF_getErrorName()) | |||
*/ | |||
/* ========================== */ | |||
/* *** simple functions *** */ | |||
/* ========================== */ | |||
/** HUF_compress() : | |||
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. | |||
* 'dst' buffer must be already allocated. | |||
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). | |||
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. | |||
* @return : size of compressed data (<= `dstCapacity`). | |||
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! | |||
* if HUF_isError(return), compression failed (more details using HUF_getErrorName()) | |||
*/ | |||
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize); | |||
/** | |||
HUF_decompress() : | |||
Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', | |||
into already allocated buffer 'dst', of minimum size 'dstSize'. | |||
`originalSize` : **must** be the ***exact*** size of original (uncompressed) data. | |||
Note : in contrast with FSE, HUF_decompress can regenerate | |||
RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, | |||
because it knows size to regenerate. | |||
@return : size of regenerated data (== originalSize), | |||
or an error code, which can be tested using HUF_isError() | |||
*/ | |||
/** HUF_decompress() : | |||
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', | |||
* into already allocated buffer 'dst', of minimum size 'dstSize'. | |||
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. | |||
* Note : in contrast with FSE, HUF_decompress can regenerate | |||
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, | |||
* because it knows size to regenerate (originalSize). | |||
* @return : size of regenerated data (== originalSize), | |||
* or an error code, which can be tested using HUF_isError() | |||
*/ | |||
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, | |||
const void* cSrc, size_t cSrcSize); | |||
@@ -100,39 +80,32 @@ HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error c | |||
/* *** Advanced function *** */ | |||
/** HUF_compress2() : | |||
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`. | |||
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */ | |||
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |||
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. | |||
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . | |||
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */ | |||
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog); | |||
/** HUF_compress4X_wksp() : | |||
* Same as HUF_compress2(), but uses externally allocated `workSpace`. | |||
* `workspace` must have minimum alignment of 4, and be at least as large as following macro */ | |||
#define HUF_WORKSPACE_SIZE (6 << 10) | |||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ | |||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) | |||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) | |||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |||
/** | |||
* The minimum workspace size for the `workSpace` used in | |||
* HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp(). | |||
* | |||
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when | |||
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. | |||
* Buffer overflow errors may potentially occur if code modifications result in | |||
* a required workspace size greater than that specified in the following | |||
* macro. | |||
*/ | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) | |||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize); | |||
#endif /* HUF_H_298734234 */ | |||
/* ****************************************************************** | |||
* WARNING !! | |||
* The following section contains advanced and experimental definitions | |||
* which shall never be used in the context of dll | |||
* which shall never be used in the context of a dynamic library, | |||
* because they are not guaranteed to remain stable in the future. | |||
* Only consider them in association with static linking. | |||
*******************************************************************/ | |||
* *****************************************************************/ | |||
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) | |||
#define HUF_H_HUF_STATIC_LINKING_ONLY | |||
@@ -141,11 +114,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const | |||
/* *** Constants *** */ | |||
#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ | |||
#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ | |||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ | |||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ | |||
#define HUF_SYMBOLVALUE_MAX 255 | |||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) | |||
# error "HUF_TABLELOG_MAX is too large !" | |||
#endif | |||
@@ -170,130 +143,195 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const | |||
/* static allocation of HUF's DTable */ | |||
typedef U32 HUF_DTable; | |||
#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) | |||
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ | |||
#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ | |||
HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } | |||
#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ | |||
#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ | |||
HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } | |||
/* **************************************** | |||
* Advanced decompression functions | |||
******************************************/ | |||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
#endif | |||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ | |||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ | |||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ | |||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
#endif | |||
/* **************************************** | |||
* HUF detailed API | |||
******************************************/ | |||
/*! | |||
HUF_compress() does the following: | |||
1. count symbol occurrence from source[] into table count[] using FSE_count() | |||
2. (optional) refine tableLog using HUF_optimalTableLog() | |||
3. build Huffman table from count using HUF_buildCTable() | |||
4. save Huffman table to memory buffer using HUF_writeCTable() | |||
5. encode the data stream using HUF_compress4X_usingCTable() | |||
The following API allows targeting specific sub-functions for advanced tasks. | |||
For example, it's possible to compress several blocks using the same 'CTable', | |||
or to save and regenerate 'CTable' using external methods. | |||
*/ | |||
/* FSE_count() : find it within "fse.h" */ | |||
* HUF detailed API | |||
* ****************************************/ | |||
/*! HUF_compress() does the following: | |||
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") | |||
* 2. (optional) refine tableLog using HUF_optimalTableLog() | |||
* 3. build Huffman table from count using HUF_buildCTable() | |||
* 4. save Huffman table to memory buffer using HUF_writeCTable() | |||
* 5. encode the data stream using HUF_compress4X_usingCTable() | |||
* | |||
* The following API allows targeting specific sub-functions for advanced tasks. | |||
* For example, it's possible to compress several blocks using the same 'CTable', | |||
* or to save and regenerate 'CTable' using external methods. | |||
*/ | |||
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); | |||
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ | |||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); | |||
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ | |||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); | |||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); | |||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); | |||
typedef enum { | |||
HUF_repeat_none, /**< Cannot use the previous table */ | |||
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ | |||
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ | |||
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ | |||
} HUF_repeat; | |||
/** HUF_compress4X_repeat() : | |||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
/** HUF_buildCTable_wksp() : | |||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |||
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. | |||
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. | |||
*/ | |||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); | |||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) | |||
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) | |||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, | |||
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, | |||
void* workSpace, size_t wkspSize); | |||
/*! HUF_readStats() : | |||
Read compact Huffman tree, saved by HUF_writeCTable(). | |||
`huffWeight` is destination buffer. | |||
@return : size read from `src` , or an error Code . | |||
Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ | |||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | |||
U32* nbSymbolsPtr, U32* tableLogPtr, | |||
* Read compact Huffman tree, saved by HUF_writeCTable(). | |||
* `huffWeight` is destination buffer. | |||
* @return : size read from `src` , or an error Code . | |||
* Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ | |||
size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, | |||
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, | |||
const void* src, size_t srcSize); | |||
/** HUF_readCTable() : | |||
* Loading a CTable saved with HUF_writeCTable() */ | |||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); | |||
* Loading a CTable saved with HUF_writeCTable() */ | |||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); | |||
/** HUF_getNbBits() : | |||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX | |||
* Note 1 : is not inlined, as HUF_CElt definition is private | |||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ | |||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); | |||
/* | |||
HUF_decompress() does the following: | |||
1. select the decompression algorithm (X2, X4) based on pre-computed heuristics | |||
2. build Huffman table from save, using HUF_readDTableXn() | |||
3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable | |||
*/ | |||
* HUF_decompress() does the following: | |||
* 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics | |||
* 2. build Huffman table from save, using HUF_readDTableX?() | |||
* 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() | |||
*/ | |||
/** HUF_selectDecoder() : | |||
* Tells which decoder is likely to decode faster, | |||
* based on a set of pre-determined metrics. | |||
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . | |||
* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ | |||
* Tells which decoder is likely to decode faster, | |||
* based on a set of pre-computed metrics. | |||
* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . | |||
* Assumption : 0 < dstSize <= 128 KB */ | |||
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); | |||
/** | |||
* The minimum workspace size for the `workSpace` used in | |||
* HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). | |||
* | |||
* The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when | |||
* HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. | |||
* Buffer overflow errors may potentially occur if code modifications result in | |||
* a required workspace size greater than that specified in the following | |||
* macro. | |||
*/ | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) | |||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); | |||
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); | |||
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); | |||
size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); | |||
#endif | |||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
/* ====================== */ | |||
/* single stream variants */ | |||
/* ====================== */ | |||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); | |||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); | |||
/** HUF_compress1X_repeat() : | |||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ | |||
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ | |||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. | |||
* If it uses hufTable it does not modify hufTable or repeat. | |||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. | |||
* If preferRepeat then the old table will always be used if valid. */ | |||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned tableLog, | |||
void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); | |||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ | |||
#endif | |||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); | |||
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); | |||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ | |||
#endif | |||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
#ifndef HUF_FORCE_DECOMPRESS_X1 | |||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); | |||
#endif | |||
/* BMI2 variants. | |||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. | |||
*/ | |||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
#ifndef HUF_FORCE_DECOMPRESS_X2 | |||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
#endif | |||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); | |||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); | |||
#endif /* HUF_STATIC_LINKING_ONLY */ | |||
@@ -1,35 +1,15 @@ | |||
/* ****************************************************************** | |||
Huffman encoder, part of New Generation Entropy library | |||
Copyright (C) 2013-2016, Yann Collet. | |||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
You can contact the author at : | |||
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
- Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* Huffman encoder, part of New Generation Entropy library | |||
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. | |||
* | |||
* You can contact the author at : | |||
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | |||
* - Public forum : https://groups.google.com/forum/#!forum/lz4c | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
****************************************************************** */ | |||
/* ************************************************************** | |||
@@ -45,7 +25,9 @@ | |||
****************************************************************/ | |||
#include <string.h> /* memcpy, memset */ | |||
#include <stdio.h> /* printf (debug) */ | |||
#include "compiler.h" | |||
#include "bitstream.h" | |||
#include "hist.h" | |||
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ | |||
#include "fse.h" /* header compression */ | |||
#define HUF_STATIC_LINKING_ONLY | |||
@@ -57,9 +39,7 @@ | |||
* Error Management | |||
****************************************************************/ | |||
#define HUF_isError ERR_isError | |||
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ | |||
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e | |||
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } | |||
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ | |||
/* ************************************************************** | |||
@@ -80,46 +60,46 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS | |||
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. | |||
*/ | |||
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 | |||
size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) | |||
static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) | |||
{ | |||
BYTE* const ostart = (BYTE*) dst; | |||
BYTE* op = ostart; | |||
BYTE* const oend = ostart + dstSize; | |||
U32 maxSymbolValue = HUF_TABLELOG_MAX; | |||
unsigned maxSymbolValue = HUF_TABLELOG_MAX; | |||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; | |||
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; | |||
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; | |||
U32 count[HUF_TABLELOG_MAX+1]; | |||
unsigned count[HUF_TABLELOG_MAX+1]; | |||
S16 norm[HUF_TABLELOG_MAX+1]; | |||
/* init conditions */ | |||
if (wtSize <= 1) return 0; /* Not compressible */ | |||
/* Scan input and build symbol stats */ | |||
{ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); | |||
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ | |||
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ | |||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |||
} | |||
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); | |||
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); | |||
/* Write table description header */ | |||
{ CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); | |||
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); | |||
op += hSize; | |||
} | |||
/* Compress */ | |||
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); | |||
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); | |||
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); | |||
if (cSize == 0) return 0; /* not enough space for compressed data */ | |||
op += cSize; | |||
} | |||
return op-ostart; | |||
return (size_t)(op-ostart); | |||
} | |||
@@ -132,7 +112,7 @@ struct HUF_CElt_s { | |||
`CTable` : Huffman tree to save, using huf representation. | |||
@return : size of saved CTable */ | |||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, | |||
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) | |||
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) | |||
{ | |||
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ | |||
BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; | |||
@@ -167,7 +147,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, | |||
} | |||
size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) | |||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) | |||
{ | |||
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ | |||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ | |||
@@ -179,7 +159,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si | |||
/* check result */ | |||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); | |||
if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); | |||
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); | |||
/* Prepare base value per rank */ | |||
{ U32 n, nextRankStart = 0; | |||
@@ -190,9 +170,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si | |||
} } | |||
/* fill nbBits */ | |||
*hasZeroWeights = 0; | |||
{ U32 n; for (n=0; n<nbSymbols; n++) { | |||
const U32 w = huffWeight[n]; | |||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w); | |||
*hasZeroWeights |= (w == 0); | |||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0); | |||
} } | |||
/* fill val */ | |||
@@ -208,12 +190,20 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si | |||
min >>= 1; | |||
} } | |||
/* assign value within rank, symbol order */ | |||
{ U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } | |||
{ U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } | |||
} | |||
*maxSymbolValuePtr = nbSymbols - 1; | |||
return readSize; | |||
} | |||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) | |||
{ | |||
const HUF_CElt* table = (const HUF_CElt*)symbolTable; | |||
assert(symbolValue <= HUF_SYMBOLVALUE_MAX); | |||
return table[symbolValue].nbBits; | |||
} | |||
typedef struct nodeElt_s { | |||
U32 count; | |||
@@ -230,7 +220,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) | |||
/* there are several too large elements (at least >= 2) */ | |||
{ int totalCost = 0; | |||
const U32 baseCost = 1 << (largestBits - maxNbBits); | |||
U32 n = lastNonNull; | |||
int n = (int)lastNonNull; | |||
while (huffNode[n].nbBits > maxNbBits) { | |||
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); | |||
@@ -245,22 +235,22 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) | |||
/* repay normalized cost */ | |||
{ U32 const noSymbol = 0xF0F0F0F0; | |||
U32 rankLast[HUF_TABLELOG_MAX+2]; | |||
int pos; | |||
/* Get pos of last (smallest) symbol per rank */ | |||
memset(rankLast, 0xF0, sizeof(rankLast)); | |||
{ U32 currentNbBits = maxNbBits; | |||
int pos; | |||
for (pos=n ; pos >= 0; pos--) { | |||
if (huffNode[pos].nbBits >= currentNbBits) continue; | |||
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ | |||
rankLast[maxNbBits-currentNbBits] = pos; | |||
rankLast[maxNbBits-currentNbBits] = (U32)pos; | |||
} } | |||
while (totalCost > 0) { | |||
U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; | |||
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; | |||
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { | |||
U32 highPos = rankLast[nBitsToDecrease]; | |||
U32 lowPos = rankLast[nBitsToDecrease-1]; | |||
U32 const highPos = rankLast[nBitsToDecrease]; | |||
U32 const lowPos = rankLast[nBitsToDecrease-1]; | |||
if (highPos == noSymbol) continue; | |||
if (lowPos == noSymbol) break; | |||
{ U32 const highTotal = huffNode[highPos].count; | |||
@@ -287,7 +277,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) | |||
if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ | |||
while (huffNode[n].nbBits == maxNbBits) n--; | |||
huffNode[n+1].nbBits--; | |||
rankLast[1] = n+1; | |||
assert(n >= 0); | |||
rankLast[1] = (U32)(n+1); | |||
totalCost++; | |||
continue; | |||
} | |||
@@ -299,29 +290,39 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) | |||
return maxNbBits; | |||
} | |||
typedef struct { | |||
U32 base; | |||
U32 current; | |||
} rankPos; | |||
static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) | |||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; | |||
#define RANK_POSITION_TABLE_SIZE 32 | |||
typedef struct { | |||
huffNodeTable huffNodeTbl; | |||
rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; | |||
} HUF_buildCTable_wksp_tables; | |||
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) | |||
{ | |||
rankPos rank[32]; | |||
U32 n; | |||
memset(rank, 0, sizeof(rank)); | |||
memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); | |||
for (n=0; n<=maxSymbolValue; n++) { | |||
U32 r = BIT_highbit32(count[n] + 1); | |||
rank[r].base ++; | |||
rankPosition[r].base ++; | |||
} | |||
for (n=30; n>0; n--) rank[n-1].base += rank[n].base; | |||
for (n=0; n<32; n++) rank[n].current = rank[n].base; | |||
for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; | |||
for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; | |||
for (n=0; n<=maxSymbolValue; n++) { | |||
U32 const c = count[n]; | |||
U32 const r = BIT_highbit32(c+1) + 1; | |||
U32 pos = rank[r].current++; | |||
while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; | |||
U32 pos = rankPosition[r].current++; | |||
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { | |||
huffNode[pos] = huffNode[pos-1]; | |||
pos--; | |||
} | |||
huffNode[pos].count = c; | |||
huffNode[pos].byte = (BYTE)n; | |||
} | |||
@@ -330,44 +331,48 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) | |||
/** HUF_buildCTable_wksp() : | |||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |||
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. | |||
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). | |||
*/ | |||
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) | |||
typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1]; | |||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) | |||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) | |||
{ | |||
nodeElt* const huffNode0 = (nodeElt*)workSpace; | |||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; | |||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; | |||
nodeElt* const huffNode = huffNode0+1; | |||
U32 n, nonNullRank; | |||
int nonNullRank; | |||
int lowS, lowN; | |||
U16 nodeNb = STARTNODE; | |||
U32 nodeRoot; | |||
int nodeNb = STARTNODE; | |||
int n, nodeRoot; | |||
/* safety checks */ | |||
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */ | |||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ | |||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) | |||
return ERROR(workSpace_tooSmall); | |||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; | |||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); | |||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) | |||
return ERROR(maxSymbolValue_tooLarge); | |||
memset(huffNode0, 0, sizeof(huffNodeTable)); | |||
/* sort, decreasing order */ | |||
HUF_sort(huffNode, count, maxSymbolValue); | |||
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); | |||
/* init for parents */ | |||
nonNullRank = maxSymbolValue; | |||
nonNullRank = (int)maxSymbolValue; | |||
while(huffNode[nonNullRank].count == 0) nonNullRank--; | |||
lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; | |||
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; | |||
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; | |||
huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; | |||
nodeNb++; lowS-=2; | |||
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); | |||
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ | |||
/* create parents */ | |||
while (nodeNb <= nodeRoot) { | |||
U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |||
U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |||
int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |||
int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; | |||
huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; | |||
huffNode[n1].parent = huffNode[n2].parent = nodeNb; | |||
huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; | |||
nodeNb++; | |||
} | |||
@@ -379,24 +384,25 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu | |||
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; | |||
/* enforce maxTableLog */ | |||
maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); | |||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); | |||
/* fill result into tree (val, nbBits) */ | |||
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; | |||
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; | |||
int const alphabetSize = (int)(maxSymbolValue + 1); | |||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ | |||
for (n=0; n<=nonNullRank; n++) | |||
nbPerRank[huffNode[n].nbBits]++; | |||
/* determine stating value per rank */ | |||
{ U16 min = 0; | |||
for (n=maxNbBits; n>0; n--) { | |||
for (n=(int)maxNbBits; n>0; n--) { | |||
valPerRank[n] = min; /* get starting value within each rank */ | |||
min += nbPerRank[n]; | |||
min >>= 1; | |||
} } | |||
for (n=0; n<=maxSymbolValue; n++) | |||
for (n=0; n<alphabetSize; n++) | |||
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ | |||
for (n=0; n<=maxSymbolValue; n++) | |||
for (n=0; n<alphabetSize; n++) | |||
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ | |||
} | |||
@@ -404,15 +410,16 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu | |||
} | |||
/** HUF_buildCTable() : | |||
* @return : maxNbBits | |||
* Note : count is used before tree is written, so they can safely overlap | |||
*/ | |||
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) | |||
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) | |||
{ | |||
huffNodeTable nodeTable; | |||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); | |||
HUF_buildCTable_wksp_tables workspace; | |||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); | |||
} | |||
static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) | |||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) | |||
{ | |||
size_t nbBits = 0; | |||
int s; | |||
@@ -422,7 +429,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count | |||
return nbBits >> 3; | |||
} | |||
static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { | |||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { | |||
int bad = 0; | |||
int s; | |||
for (s = 0; s <= (int)maxSymbolValue; ++s) { | |||
@@ -431,13 +438,14 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns | |||
return !bad; | |||
} | |||
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) | |||
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } | |||
FORCE_INLINE_TEMPLATE void | |||
HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) | |||
{ | |||
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); | |||
} | |||
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } | |||
#define HUF_FLUSHBITS(s) BIT_flushBits(s) | |||
#define HUF_FLUSHBITS_1(stream) \ | |||
@@ -446,7 +454,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } | |||
#define HUF_FLUSHBITS_2(stream) \ | |||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) | |||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) | |||
FORCE_INLINE_TEMPLATE size_t | |||
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable) | |||
{ | |||
const BYTE* ip = (const BYTE*) src; | |||
BYTE* const ostart = (BYTE*)dst; | |||
@@ -457,7 +468,7 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si | |||
/* init */ | |||
if (dstSize < 8) return 0; /* not enough space to compress */ | |||
{ size_t const initErr = BIT_initCStream(&bitC, op, oend-op); | |||
{ size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); | |||
if (HUF_isError(initErr)) return 0; } | |||
n = srcSize & ~3; /* join to mod 4 */ | |||
@@ -490,8 +501,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si | |||
return BIT_closeCStream(&bitC); | |||
} | |||
#if DYNAMIC_BMI2 | |||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) | |||
static TARGET_ATTRIBUTE("bmi2") size_t | |||
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable) | |||
{ | |||
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |||
} | |||
static size_t | |||
HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable) | |||
{ | |||
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |||
} | |||
static size_t | |||
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable, const int bmi2) | |||
{ | |||
if (bmi2) { | |||
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); | |||
} | |||
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); | |||
} | |||
#else | |||
static size_t | |||
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable, const int bmi2) | |||
{ | |||
(void)bmi2; | |||
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); | |||
} | |||
#endif | |||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) | |||
{ | |||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); | |||
} | |||
static size_t | |||
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
const HUF_CElt* CTable, int bmi2) | |||
{ | |||
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ | |||
const BYTE* ip = (const BYTE*) src; | |||
@@ -504,132 +565,162 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si | |||
if (srcSize < 12) return 0; /* no saving possible : too small input */ | |||
op += 6; /* jumpTable */ | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |||
assert(op <= oend); | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); | |||
if (cSize==0) return 0; | |||
assert(cSize <= 65535); | |||
MEM_writeLE16(ostart, (U16)cSize); | |||
op += cSize; | |||
} | |||
ip += segmentSize; | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |||
assert(op <= oend); | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); | |||
if (cSize==0) return 0; | |||
assert(cSize <= 65535); | |||
MEM_writeLE16(ostart+2, (U16)cSize); | |||
op += cSize; | |||
} | |||
ip += segmentSize; | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |||
assert(op <= oend); | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); | |||
if (cSize==0) return 0; | |||
assert(cSize <= 65535); | |||
MEM_writeLE16(ostart+4, (U16)cSize); | |||
op += cSize; | |||
} | |||
ip += segmentSize; | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) ); | |||
assert(op <= oend); | |||
assert(ip <= iend); | |||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); | |||
if (cSize==0) return 0; | |||
op += cSize; | |||
} | |||
return op-ostart; | |||
return (size_t)(op-ostart); | |||
} | |||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) | |||
{ | |||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); | |||
} | |||
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; | |||
static size_t HUF_compressCTable_internal( | |||
BYTE* const ostart, BYTE* op, BYTE* const oend, | |||
const void* src, size_t srcSize, | |||
unsigned singleStream, const HUF_CElt* CTable) | |||
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) | |||
{ | |||
size_t const cSize = singleStream ? | |||
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : | |||
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); | |||
size_t const cSize = (nbStreams==HUF_singleStream) ? | |||
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : | |||
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); | |||
if (HUF_isError(cSize)) { return cSize; } | |||
if (cSize==0) { return 0; } /* uncompressible */ | |||
op += cSize; | |||
/* check compressibility */ | |||
assert(op >= ostart); | |||
if ((size_t)(op-ostart) >= srcSize-1) { return 0; } | |||
return op-ostart; | |||
return (size_t)(op-ostart); | |||
} | |||
/* `workSpace` must a table of at least 1024 unsigned */ | |||
static size_t HUF_compress_internal ( | |||
void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
unsigned singleStream, | |||
void* workSpace, size_t wkspSize, | |||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat) | |||
typedef struct { | |||
unsigned count[HUF_SYMBOLVALUE_MAX + 1]; | |||
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; | |||
HUF_buildCTable_wksp_tables buildCTable_wksp; | |||
} HUF_compress_tables_t; | |||
/* HUF_compress_internal() : | |||
* `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ | |||
static size_t | |||
HUF_compress_internal (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
HUF_nbStreams_e nbStreams, | |||
void* workSpace, size_t wkspSize, | |||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, | |||
const int bmi2) | |||
{ | |||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; | |||
BYTE* const ostart = (BYTE*)dst; | |||
BYTE* const oend = ostart + dstSize; | |||
BYTE* op = ostart; | |||
U32* count; | |||
size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); | |||
HUF_CElt* CTable; | |||
size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); | |||
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); | |||
/* checks & inits */ | |||
if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC); | |||
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ | |||
if (!dstSize) return 0; /* cannot fit within dst budget */ | |||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ | |||
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); | |||
if (!srcSize) return 0; /* Uncompressed */ | |||
if (!dstSize) return 0; /* cannot fit anything within dst budget */ | |||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ | |||
if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); | |||
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); | |||
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; | |||
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; | |||
count = (U32*)workSpace; | |||
workSpace = (BYTE*)workSpace + countSize; | |||
wkspSize -= countSize; | |||
CTable = (HUF_CElt*)workSpace; | |||
workSpace = (BYTE*)workSpace + CTableSize; | |||
wkspSize -= CTableSize; | |||
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ | |||
/* Heuristic : If old table is valid, use it for small inputs */ | |||
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { | |||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |||
return HUF_compressCTable_internal(ostart, op, oend, | |||
src, srcSize, | |||
nbStreams, oldHufTable, bmi2); | |||
} | |||
/* Scan input and build symbol stats */ | |||
{ CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) ); | |||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); | |||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ | |||
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ | |||
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ | |||
} | |||
/* Check validity of previous table */ | |||
if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { | |||
if ( repeat | |||
&& *repeat == HUF_repeat_check | |||
&& !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { | |||
*repeat = HUF_repeat_none; | |||
} | |||
/* Heuristic : use existing table for small inputs */ | |||
if (preferRepeat && repeat && *repeat != HUF_repeat_none) { | |||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |||
return HUF_compressCTable_internal(ostart, op, oend, | |||
src, srcSize, | |||
nbStreams, oldHufTable, bmi2); | |||
} | |||
/* Build Huffman Tree */ | |||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); | |||
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) ); | |||
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, | |||
maxSymbolValue, huffLog, | |||
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); | |||
CHECK_F(maxBits); | |||
huffLog = (U32)maxBits; | |||
/* Zero the unused symbols so we can check it for validity */ | |||
memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); | |||
/* Zero unused symbols in CTable, so we can check it for validity */ | |||
memset(table->CTable + (maxSymbolValue + 1), 0, | |||
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); | |||
} | |||
/* Write table description header */ | |||
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) ); | |||
/* Check if using the previous table will be beneficial */ | |||
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); | |||
/* Check if using previous huffman table is beneficial */ | |||
if (repeat && *repeat != HUF_repeat_none) { | |||
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); | |||
size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); | |||
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); | |||
size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); | |||
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { | |||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); | |||
} | |||
} | |||
/* Use the new table */ | |||
return HUF_compressCTable_internal(ostart, op, oend, | |||
src, srcSize, | |||
nbStreams, oldHufTable, bmi2); | |||
} } | |||
/* Use the new huffman table */ | |||
if (hSize + 12ul >= srcSize) { return 0; } | |||
op += hSize; | |||
if (repeat) { *repeat = HUF_repeat_none; } | |||
if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */ | |||
if (oldHufTable) | |||
memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ | |||
} | |||
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); | |||
return HUF_compressCTable_internal(ostart, op, oend, | |||
src, srcSize, | |||
nbStreams, table->CTable, bmi2); | |||
} | |||
@@ -638,52 +729,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
void* workSpace, size_t wkspSize) | |||
{ | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, | |||
maxSymbolValue, huffLog, HUF_singleStream, | |||
workSpace, wkspSize, | |||
NULL, NULL, 0, 0 /*bmi2*/); | |||
} | |||
size_t HUF_compress1X_repeat (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
void* workSpace, size_t wkspSize, | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat) | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) | |||
{ | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat); | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, | |||
maxSymbolValue, huffLog, HUF_singleStream, | |||
workSpace, wkspSize, hufTable, | |||
repeat, preferRepeat, bmi2); | |||
} | |||
size_t HUF_compress1X (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog) | |||
{ | |||
unsigned workSpace[1024]; | |||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; | |||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); | |||
} | |||
/* HUF_compress4X_repeat(): | |||
* compress input using 4 streams. | |||
* provide workspace to generate compression tables */ | |||
size_t HUF_compress4X_wksp (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
void* workSpace, size_t wkspSize) | |||
{ | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, | |||
maxSymbolValue, huffLog, HUF_fourStreams, | |||
workSpace, wkspSize, | |||
NULL, NULL, 0, 0 /*bmi2*/); | |||
} | |||
/* HUF_compress4X_repeat(): | |||
* compress input using 4 streams. | |||
* re-use an existing huffman compression table */ | |||
size_t HUF_compress4X_repeat (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog, | |||
void* workSpace, size_t wkspSize, | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat) | |||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) | |||
{ | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat); | |||
return HUF_compress_internal(dst, dstSize, src, srcSize, | |||
maxSymbolValue, huffLog, HUF_fourStreams, | |||
workSpace, wkspSize, | |||
hufTable, repeat, preferRepeat, bmi2); | |||
} | |||
size_t HUF_compress2 (void* dst, size_t dstSize, | |||
const void* src, size_t srcSize, | |||
unsigned maxSymbolValue, unsigned huffLog) | |||
{ | |||
unsigned workSpace[1024]; | |||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; | |||
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); | |||
} | |||
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) | |||
{ | |||
return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); | |||
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -39,10 +39,87 @@ extern "C" { | |||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ | |||
#endif | |||
#ifndef __has_builtin | |||
# define __has_builtin(x) 0 /* compat. with non-clang compilers */ | |||
#endif | |||
/* code only tested on 32 and 64 bits systems */ | |||
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } | |||
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } | |||
/* detects whether we are being compiled under msan */ | |||
#if defined (__has_feature) | |||
# if __has_feature(memory_sanitizer) | |||
# define MEMORY_SANITIZER 1 | |||
# endif | |||
#endif | |||
#if defined (MEMORY_SANITIZER) | |||
/* Not all platforms that support msan provide sanitizers/msan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
#include <stdint.h> /* intptr_t */ | |||
/* Make memory region fully initialized (without changing its contents). */ | |||
void __msan_unpoison(const volatile void *a, size_t size); | |||
/* Make memory region fully uninitialized (without changing its contents). | |||
This is a legacy interface that does not update origin information. Use | |||
__msan_allocated_memory() instead. */ | |||
void __msan_poison(const volatile void *a, size_t size); | |||
/* Returns the offset of the first (at least partially) poisoned byte in the | |||
memory range, or -1 if the whole range is good. */ | |||
intptr_t __msan_test_shadow(const volatile void *x, size_t size); | |||
#endif | |||
/* detects whether we are being compiled under asan */ | |||
#if defined (__has_feature) | |||
# if __has_feature(address_sanitizer) | |||
# define ADDRESS_SANITIZER 1 | |||
# endif | |||
#elif defined(__SANITIZE_ADDRESS__) | |||
# define ADDRESS_SANITIZER 1 | |||
#endif | |||
#if defined (ADDRESS_SANITIZER) | |||
/* Not all platforms that support asan provide sanitizers/asan_interface.h. | |||
* We therefore declare the functions we need ourselves, rather than trying to | |||
* include the header file... */ | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable. | |||
* | |||
* This memory must be previously allocated by your program. Instrumented | |||
* code is forbidden from accessing addresses in this region until it is | |||
* unpoisoned. This function is not guaranteed to poison the entire region - | |||
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan | |||
* alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can poison or | |||
* unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_poison_memory_region(void const volatile *addr, size_t size); | |||
/** | |||
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable. | |||
* | |||
* This memory must be previously allocated by your program. Accessing | |||
* addresses in this region is allowed until this region is poisoned again. | |||
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due | |||
* to ASan alignment restrictions. | |||
* | |||
* \note This function is not thread-safe because no two threads can | |||
* poison or unpoison memory in the same memory region simultaneously. | |||
* | |||
* \param addr Start of memory region. | |||
* \param size Size of memory region. */ | |||
void __asan_unpoison_memory_region(void const volatile *addr, size_t size); | |||
#endif | |||
/*-************************************************************** | |||
* Basic Types | |||
@@ -56,18 +133,26 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size | |||
typedef int32_t S32; | |||
typedef uint64_t U64; | |||
typedef int64_t S64; | |||
typedef intptr_t iPtrDiff; | |||
typedef uintptr_t uPtrDiff; | |||
#else | |||
# include <limits.h> | |||
#if CHAR_BIT != 8 | |||
# error "this implementation requires char to be exactly 8-bit type" | |||
#endif | |||
typedef unsigned char BYTE; | |||
#if USHRT_MAX != 65535 | |||
# error "this implementation requires short to be exactly 16-bit type" | |||
#endif | |||
typedef unsigned short U16; | |||
typedef signed short S16; | |||
#if UINT_MAX != 4294967295 | |||
# error "this implementation requires int to be exactly 32-bit type" | |||
#endif | |||
typedef unsigned int U32; | |||
typedef signed int S32; | |||
/* note : there are no limits defined for long long type in C90. | |||
* limits exist in C99, however, in such case, <stdint.h> is preferred */ | |||
typedef unsigned long long U64; | |||
typedef signed long long S64; | |||
typedef ptrdiff_t iPtrDiff; | |||
typedef size_t uPtrDiff; | |||
#endif | |||
@@ -90,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size | |||
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ | |||
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |||
# define MEM_FORCE_MEMORY_ACCESS 2 | |||
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) | |||
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) | |||
# define MEM_FORCE_MEMORY_ACCESS 1 | |||
# endif | |||
#endif | |||
@@ -123,20 +208,26 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } | |||
/* currently only defined for gcc and icc */ | |||
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) | |||
__pragma( pack(push, 1) ) | |||
typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; | |||
typedef struct { U16 v; } unalign16; | |||
typedef struct { U32 v; } unalign32; | |||
typedef struct { U64 v; } unalign64; | |||
typedef struct { size_t v; } unalignArch; | |||
__pragma( pack(pop) ) | |||
#else | |||
typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; | |||
typedef struct { U16 v; } __attribute__((packed)) unalign16; | |||
typedef struct { U32 v; } __attribute__((packed)) unalign32; | |||
typedef struct { U64 v; } __attribute__((packed)) unalign64; | |||
typedef struct { size_t v; } __attribute__((packed)) unalignArch; | |||
#endif | |||
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } | |||
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } | |||
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } | |||
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } | |||
MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } | |||
MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } | |||
MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } | |||
MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } | |||
MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } | |||
MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } | |||
MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } | |||
#else | |||
@@ -184,7 +275,8 @@ MEM_STATIC U32 MEM_swap32(U32 in) | |||
{ | |||
#if defined(_MSC_VER) /* Visual Studio */ | |||
return _byteswap_ulong(in); | |||
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) | |||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ | |||
|| (defined(__clang__) && __has_builtin(__builtin_bswap32)) | |||
return __builtin_bswap32(in); | |||
#else | |||
return ((in << 24) & 0xff000000 ) | | |||
@@ -198,7 +290,8 @@ MEM_STATIC U64 MEM_swap64(U64 in) | |||
{ | |||
#if defined(_MSC_VER) /* Visual Studio */ | |||
return _byteswap_uint64(in); | |||
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) | |||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ | |||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64)) | |||
return __builtin_bswap64(in); | |||
#else | |||
return ((in << 56) & 0xff00000000000000ULL) | |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -10,8 +10,9 @@ | |||
/* ====== Dependencies ======= */ | |||
#include <stddef.h> /* size_t */ | |||
#include <stdlib.h> /* malloc, calloc, free */ | |||
#include <stddef.h> /* size_t */ | |||
#include "debug.h" /* assert */ | |||
#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ | |||
#include "pool.h" | |||
/* ====== Compiler specifics ====== */ | |||
@@ -33,8 +34,9 @@ typedef struct POOL_job_s { | |||
struct POOL_ctx_s { | |||
ZSTD_customMem customMem; | |||
/* Keep track of the threads */ | |||
pthread_t *threads; | |||
size_t numThreads; | |||
ZSTD_pthread_t* threads; | |||
size_t threadCapacity; | |||
size_t threadLimit; | |||
/* The queue is a circular buffer */ | |||
POOL_job *queue; | |||
@@ -48,34 +50,37 @@ struct POOL_ctx_s { | |||
int queueEmpty; | |||
/* The mutex protects the queue */ | |||
pthread_mutex_t queueMutex; | |||
ZSTD_pthread_mutex_t queueMutex; | |||
/* Condition variable for pushers to wait on when the queue is full */ | |||
pthread_cond_t queuePushCond; | |||
ZSTD_pthread_cond_t queuePushCond; | |||
/* Condition variables for poppers to wait on when the queue is empty */ | |||
pthread_cond_t queuePopCond; | |||
ZSTD_pthread_cond_t queuePopCond; | |||
/* Indicates if the queue is shutting down */ | |||
int shutdown; | |||
}; | |||
/* POOL_thread() : | |||
Work thread for the thread pool. | |||
Waits for jobs and executes them. | |||
@returns : NULL on failure else non-null. | |||
*/ | |||
* Work thread for the thread pool. | |||
* Waits for jobs and executes them. | |||
* @returns : NULL on failure else non-null. | |||
*/ | |||
static void* POOL_thread(void* opaque) { | |||
POOL_ctx* const ctx = (POOL_ctx*)opaque; | |||
if (!ctx) { return NULL; } | |||
for (;;) { | |||
/* Lock the mutex and wait for a non-empty queue or until shutdown */ | |||
pthread_mutex_lock(&ctx->queueMutex); | |||
while (ctx->queueEmpty && !ctx->shutdown) { | |||
pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); | |||
} | |||
/* empty => shutting down: so stop */ | |||
if (ctx->queueEmpty) { | |||
pthread_mutex_unlock(&ctx->queueMutex); | |||
return opaque; | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
while ( ctx->queueEmpty | |||
|| (ctx->numThreadsBusy >= ctx->threadLimit) ) { | |||
if (ctx->shutdown) { | |||
/* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), | |||
* a few threads will be shutdown while !queueEmpty, | |||
* but enough threads will remain active to finish the queue */ | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
return opaque; | |||
} | |||
ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); | |||
} | |||
/* Pop a job off the queue */ | |||
{ POOL_job const job = ctx->queue[ctx->queueHead]; | |||
@@ -83,62 +88,69 @@ static void* POOL_thread(void* opaque) { | |||
ctx->numThreadsBusy++; | |||
ctx->queueEmpty = ctx->queueHead == ctx->queueTail; | |||
/* Unlock the mutex, signal a pusher, and run the job */ | |||
pthread_mutex_unlock(&ctx->queueMutex); | |||
pthread_cond_signal(&ctx->queuePushCond); | |||
ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
job.function(job.opaque); | |||
/* If the intended queue size was 0, signal after finishing job */ | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
ctx->numThreadsBusy--; | |||
if (ctx->queueSize == 1) { | |||
pthread_mutex_lock(&ctx->queueMutex); | |||
ctx->numThreadsBusy--; | |||
pthread_mutex_unlock(&ctx->queueMutex); | |||
pthread_cond_signal(&ctx->queuePushCond); | |||
} } | |||
ZSTD_pthread_cond_signal(&ctx->queuePushCond); | |||
} | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
} | |||
} /* for (;;) */ | |||
/* Unreachable */ | |||
assert(0); /* Unreachable */ | |||
} | |||
POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { | |||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { | |||
return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); | |||
} | |||
POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { | |||
POOL_ctx *ctx; | |||
/* Check the parameters */ | |||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, | |||
ZSTD_customMem customMem) { | |||
POOL_ctx* ctx; | |||
/* Check parameters */ | |||
if (!numThreads) { return NULL; } | |||
/* Allocate the context and zero initialize */ | |||
ctx = (POOL_ctx *)ZSTD_calloc(sizeof(POOL_ctx), customMem); | |||
ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); | |||
if (!ctx) { return NULL; } | |||
/* Initialize the job queue. | |||
* It needs one extra space since one space is wasted to differentiate empty | |||
* and full queues. | |||
* It needs one extra space since one space is wasted to differentiate | |||
* empty and full queues. | |||
*/ | |||
ctx->queueSize = queueSize + 1; | |||
ctx->queue = (POOL_job*) malloc(ctx->queueSize * sizeof(POOL_job)); | |||
ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); | |||
ctx->queueHead = 0; | |||
ctx->queueTail = 0; | |||
ctx->numThreadsBusy = 0; | |||
ctx->queueEmpty = 1; | |||
(void)pthread_mutex_init(&ctx->queueMutex, NULL); | |||
(void)pthread_cond_init(&ctx->queuePushCond, NULL); | |||
(void)pthread_cond_init(&ctx->queuePopCond, NULL); | |||
{ | |||
int error = 0; | |||
error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); | |||
error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); | |||
error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); | |||
if (error) { POOL_free(ctx); return NULL; } | |||
} | |||
ctx->shutdown = 0; | |||
/* Allocate space for the thread handles */ | |||
ctx->threads = (pthread_t*)ZSTD_malloc(numThreads * sizeof(pthread_t), customMem); | |||
ctx->numThreads = 0; | |||
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); | |||
ctx->threadCapacity = 0; | |||
ctx->customMem = customMem; | |||
/* Check for errors */ | |||
if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } | |||
/* Initialize the threads */ | |||
{ size_t i; | |||
for (i = 0; i < numThreads; ++i) { | |||
if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { | |||
ctx->numThreads = i; | |||
if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { | |||
ctx->threadCapacity = i; | |||
POOL_free(ctx); | |||
return NULL; | |||
} } | |||
ctx->numThreads = numThreads; | |||
ctx->threadCapacity = numThreads; | |||
ctx->threadLimit = numThreads; | |||
} | |||
return ctx; | |||
} | |||
@@ -146,80 +158,146 @@ POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM | |||
/*! POOL_join() : | |||
Shutdown the queue, wake any sleeping threads, and join all of the threads. | |||
*/ | |||
static void POOL_join(POOL_ctx *ctx) { | |||
static void POOL_join(POOL_ctx* ctx) { | |||
/* Shut down the queue */ | |||
pthread_mutex_lock(&ctx->queueMutex); | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
ctx->shutdown = 1; | |||
pthread_mutex_unlock(&ctx->queueMutex); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
/* Wake up sleeping threads */ | |||
pthread_cond_broadcast(&ctx->queuePushCond); | |||
pthread_cond_broadcast(&ctx->queuePopCond); | |||
ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); | |||
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); | |||
/* Join all of the threads */ | |||
{ size_t i; | |||
for (i = 0; i < ctx->numThreads; ++i) { | |||
pthread_join(ctx->threads[i], NULL); | |||
for (i = 0; i < ctx->threadCapacity; ++i) { | |||
ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ | |||
} } | |||
} | |||
void POOL_free(POOL_ctx *ctx) { | |||
if (!ctx) { return; } | |||
POOL_join(ctx); | |||
pthread_mutex_destroy(&ctx->queueMutex); | |||
pthread_cond_destroy(&ctx->queuePushCond); | |||
pthread_cond_destroy(&ctx->queuePopCond); | |||
ZSTD_pthread_mutex_destroy(&ctx->queueMutex); | |||
ZSTD_pthread_cond_destroy(&ctx->queuePushCond); | |||
ZSTD_pthread_cond_destroy(&ctx->queuePopCond); | |||
ZSTD_free(ctx->queue, ctx->customMem); | |||
ZSTD_free(ctx->threads, ctx->customMem); | |||
ZSTD_free(ctx, ctx->customMem); | |||
} | |||
size_t POOL_sizeof(POOL_ctx *ctx) { | |||
if (ctx==NULL) return 0; /* supports sizeof NULL */ | |||
return sizeof(*ctx) | |||
+ ctx->queueSize * sizeof(POOL_job) | |||
+ ctx->numThreads * sizeof(pthread_t); | |||
+ ctx->threadCapacity * sizeof(ZSTD_pthread_t); | |||
} | |||
/* @return : 0 on success, 1 on error */ | |||
static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) | |||
{ | |||
if (numThreads <= ctx->threadCapacity) { | |||
if (!numThreads) return 1; | |||
ctx->threadLimit = numThreads; | |||
return 0; | |||
} | |||
/* numThreads > threadCapacity */ | |||
{ ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); | |||
if (!threadPool) return 1; | |||
/* replace existing thread pool */ | |||
memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); | |||
ZSTD_free(ctx->threads, ctx->customMem); | |||
ctx->threads = threadPool; | |||
/* Initialize additional threads */ | |||
{ size_t threadId; | |||
for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { | |||
if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { | |||
ctx->threadCapacity = threadId; | |||
return 1; | |||
} } | |||
} } | |||
/* successfully expanded */ | |||
ctx->threadCapacity = numThreads; | |||
ctx->threadLimit = numThreads; | |||
return 0; | |||
} | |||
/* @return : 0 on success, 1 on error */ | |||
int POOL_resize(POOL_ctx* ctx, size_t numThreads) | |||
{ | |||
int result; | |||
if (ctx==NULL) return 1; | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
result = POOL_resize_internal(ctx, numThreads); | |||
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
return result; | |||
} | |||
/** | |||
* Returns 1 if the queue is full and 0 otherwise. | |||
* | |||
* If the queueSize is 1 (the pool was created with an intended queueSize of 0), | |||
* then a queue is empty if there is a thread free and no job is waiting. | |||
* When queueSize is 1 (pool was created with an intended queueSize of 0), | |||
* then a queue is empty if there is a thread free _and_ no job is waiting. | |||
*/ | |||
static int isQueueFull(POOL_ctx const* ctx) { | |||
if (ctx->queueSize > 1) { | |||
return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); | |||
} else { | |||
return ctx->numThreadsBusy == ctx->numThreads || | |||
return (ctx->numThreadsBusy == ctx->threadLimit) || | |||
!ctx->queueEmpty; | |||
} | |||
} | |||
void POOL_add(void* ctxVoid, POOL_function function, void *opaque) { | |||
POOL_ctx* const ctx = (POOL_ctx*)ctxVoid; | |||
if (!ctx) { return; } | |||
pthread_mutex_lock(&ctx->queueMutex); | |||
{ POOL_job const job = {function, opaque}; | |||
static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) | |||
{ | |||
POOL_job const job = {function, opaque}; | |||
assert(ctx != NULL); | |||
if (ctx->shutdown) return; | |||
/* Wait until there is space in the queue for the new job */ | |||
while (isQueueFull(ctx) && !ctx->shutdown) { | |||
pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); | |||
} | |||
/* The queue is still going => there is space */ | |||
if (!ctx->shutdown) { | |||
ctx->queueEmpty = 0; | |||
ctx->queue[ctx->queueTail] = job; | |||
ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; | |||
} | |||
ctx->queueEmpty = 0; | |||
ctx->queue[ctx->queueTail] = job; | |||
ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; | |||
ZSTD_pthread_cond_signal(&ctx->queuePopCond); | |||
} | |||
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) | |||
{ | |||
assert(ctx != NULL); | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
/* Wait until there is space in the queue for the new job */ | |||
while (isQueueFull(ctx) && (!ctx->shutdown)) { | |||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); | |||
} | |||
pthread_mutex_unlock(&ctx->queueMutex); | |||
pthread_cond_signal(&ctx->queuePopCond); | |||
POOL_add_internal(ctx, function, opaque); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
} | |||
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) | |||
{ | |||
assert(ctx != NULL); | |||
ZSTD_pthread_mutex_lock(&ctx->queueMutex); | |||
if (isQueueFull(ctx)) { | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
return 0; | |||
} | |||
POOL_add_internal(ctx, function, opaque); | |||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex); | |||
return 1; | |||
} | |||
#else /* ZSTD_MULTITHREAD not defined */ | |||
/* ========================== */ | |||
/* No multi-threading support */ | |||
/* ========================== */ | |||
/* We don't need any data, but if it is empty malloc() might return NULL. */ | |||
/* We don't need any data, but if it is empty, malloc() might return NULL. */ | |||
struct POOL_ctx_s { | |||
int dummy; | |||
}; | |||
@@ -241,9 +319,20 @@ void POOL_free(POOL_ctx* ctx) { | |||
(void)ctx; | |||
} | |||
void POOL_add(void* ctx, POOL_function function, void* opaque) { | |||
int POOL_resize(POOL_ctx* ctx, size_t numThreads) { | |||
(void)ctx; (void)numThreads; | |||
return 0; | |||
} | |||
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { | |||
(void)ctx; | |||
function(opaque); | |||
} | |||
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { | |||
(void)ctx; | |||
function(opaque); | |||
return 1; | |||
} | |||
size_t POOL_sizeof(POOL_ctx* ctx) { |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -17,7 +17,8 @@ extern "C" { | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd_internal.h" /* ZSTD_customMem */ | |||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ | |||
#include "../zstd.h" | |||
typedef struct POOL_ctx_s POOL_ctx; | |||
@@ -27,35 +28,53 @@ typedef struct POOL_ctx_s POOL_ctx; | |||
* The maximum number of queued jobs before blocking is `queueSize`. | |||
* @return : POOL_ctx pointer on success, else NULL. | |||
*/ | |||
POOL_ctx *POOL_create(size_t numThreads, size_t queueSize); | |||
POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); | |||
POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem); | |||
POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, | |||
ZSTD_customMem customMem); | |||
/*! POOL_free() : | |||
Free a thread pool returned by POOL_create(). | |||
*/ | |||
void POOL_free(POOL_ctx *ctx); | |||
* Free a thread pool returned by POOL_create(). | |||
*/ | |||
void POOL_free(POOL_ctx* ctx); | |||
/*! POOL_resize() : | |||
* Expands or shrinks pool's number of threads. | |||
* This is more efficient than releasing + creating a new context, | |||
* since it tries to preserve and re-use existing threads. | |||
* `numThreads` must be at least 1. | |||
* @return : 0 when resize was successful, | |||
* !0 (typically 1) if there is an error. | |||
* note : only numThreads can be resized, queueSize remains unchanged. | |||
*/ | |||
int POOL_resize(POOL_ctx* ctx, size_t numThreads); | |||
/*! POOL_sizeof() : | |||
return memory usage of pool returned by POOL_create(). | |||
*/ | |||
size_t POOL_sizeof(POOL_ctx *ctx); | |||
* @return threadpool memory usage | |||
* note : compatible with NULL (returns 0 in this case) | |||
*/ | |||
size_t POOL_sizeof(POOL_ctx* ctx); | |||
/*! POOL_function : | |||
The function type that can be added to a thread pool. | |||
*/ | |||
typedef void (*POOL_function)(void *); | |||
/*! POOL_add_function : | |||
The function type for a generic thread pool add function. | |||
*/ | |||
typedef void (*POOL_add_function)(void *, POOL_function, void *); | |||
* The function type that can be added to a thread pool. | |||
*/ | |||
typedef void (*POOL_function)(void*); | |||
/*! POOL_add() : | |||
Add the job `function(opaque)` to the thread pool. | |||
Possibly blocks until there is room in the queue. | |||
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. | |||
*/ | |||
void POOL_add(void *ctx, POOL_function function, void *opaque); | |||
* Add the job `function(opaque)` to the thread pool. `ctx` must be valid. | |||
* Possibly blocks until there is room in the queue. | |||
* Note : The function may be executed asynchronously, | |||
* therefore, `opaque` must live until function has been completed. | |||
*/ | |||
void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); | |||
/*! POOL_tryAdd() : | |||
* Add the job `function(opaque)` to thread pool _if_ a worker is available. | |||
* Returns immediately even if not (does not block). | |||
* @return : 1 if successful, 0 if not. | |||
*/ | |||
int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); | |||
#if defined (__cplusplus) |
@@ -2,20 +2,23 @@ | |||
* Copyright (c) 2016 Tino Reichardt | |||
* All rights reserved. | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/** | |||
* This file will hold wrapper for systems, which do not support pthreads | |||
*/ | |||
/* create fake symbol to avoid empty trnaslation unit warning */ | |||
int g_ZSTD_threading_useles_symbol; | |||
#include "threading.h" | |||
/* create fake symbol to avoid empty translation unit warning */ | |||
int g_ZSTD_threading_useless_symbol; | |||
#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) | |||
@@ -28,19 +31,18 @@ int g_ZSTD_threading_useles_symbol; | |||
/* === Dependencies === */ | |||
#include <process.h> | |||
#include <errno.h> | |||
#include "threading.h" | |||
/* === Implementation === */ | |||
static unsigned __stdcall worker(void *arg) | |||
{ | |||
pthread_t* const thread = (pthread_t*) arg; | |||
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; | |||
thread->arg = thread->start_routine(thread->arg); | |||
return 0; | |||
} | |||
int pthread_create(pthread_t* thread, const void* unused, | |||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, | |||
void* (*start_routine) (void*), void* arg) | |||
{ | |||
(void)unused; | |||
@@ -54,16 +56,16 @@ int pthread_create(pthread_t* thread, const void* unused, | |||
return 0; | |||
} | |||
int _pthread_join(pthread_t * thread, void **value_ptr) | |||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) | |||
{ | |||
DWORD result; | |||
if (!thread->handle) return 0; | |||
if (!thread.handle) return 0; | |||
result = WaitForSingleObject(thread->handle, INFINITE); | |||
result = WaitForSingleObject(thread.handle, INFINITE); | |||
switch (result) { | |||
case WAIT_OBJECT_0: | |||
if (value_ptr) *value_ptr = thread->arg; | |||
if (value_ptr) *value_ptr = thread.arg; | |||
return 0; | |||
case WAIT_ABANDONED: | |||
return EINVAL; | |||
@@ -73,3 +75,47 @@ int _pthread_join(pthread_t * thread, void **value_ptr) | |||
} | |||
#endif /* ZSTD_MULTITHREAD */ | |||
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) | |||
#include <stdlib.h> | |||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) | |||
{ | |||
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); | |||
if (!*mutex) | |||
return 1; | |||
return pthread_mutex_init(*mutex, attr); | |||
} | |||
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) | |||
{ | |||
if (!*mutex) | |||
return 0; | |||
{ | |||
int const ret = pthread_mutex_destroy(*mutex); | |||
free(*mutex); | |||
return ret; | |||
} | |||
} | |||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) | |||
{ | |||
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t)); | |||
if (!*cond) | |||
return 1; | |||
return pthread_cond_init(*cond, attr); | |||
} | |||
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) | |||
{ | |||
if (!*cond) | |||
return 0; | |||
{ | |||
int const ret = pthread_cond_destroy(*cond); | |||
free(*cond); | |||
return ret; | |||
} | |||
} | |||
#endif |
@@ -2,17 +2,20 @@ | |||
* Copyright (c) 2016 Tino Reichardt | |||
* All rights reserved. | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* | |||
* You can contact the author at: | |||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef THREADING_H_938743 | |||
#define THREADING_H_938743 | |||
#include "debug.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
@@ -44,59 +47,104 @@ extern "C" { | |||
/* mutex */ | |||
#define pthread_mutex_t CRITICAL_SECTION | |||
#define pthread_mutex_init(a,b) (InitializeCriticalSection((a)), 0) | |||
#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) | |||
#define pthread_mutex_lock(a) EnterCriticalSection((a)) | |||
#define pthread_mutex_unlock(a) LeaveCriticalSection((a)) | |||
#define ZSTD_pthread_mutex_t CRITICAL_SECTION | |||
#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) | |||
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) | |||
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) | |||
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) | |||
/* condition variable */ | |||
#define pthread_cond_t CONDITION_VARIABLE | |||
#define pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0) | |||
#define pthread_cond_destroy(a) /* No delete */ | |||
#define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) | |||
#define pthread_cond_signal(a) WakeConditionVariable((a)) | |||
#define pthread_cond_broadcast(a) WakeAllConditionVariable((a)) | |||
/* pthread_create() and pthread_join() */ | |||
#define ZSTD_pthread_cond_t CONDITION_VARIABLE | |||
#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) | |||
#define ZSTD_pthread_cond_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) | |||
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) | |||
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) | |||
/* ZSTD_pthread_create() and ZSTD_pthread_join() */ | |||
typedef struct { | |||
HANDLE handle; | |||
void* (*start_routine)(void*); | |||
void* arg; | |||
} pthread_t; | |||
} ZSTD_pthread_t; | |||
int pthread_create(pthread_t* thread, const void* unused, | |||
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, | |||
void* (*start_routine) (void*), void* arg); | |||
#define pthread_join(a, b) _pthread_join(&(a), (b)) | |||
int _pthread_join(pthread_t* thread, void** value_ptr); | |||
int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); | |||
/** | |||
* add here more wrappers as required | |||
*/ | |||
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ | |||
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ | |||
/* === POSIX Systems === */ | |||
# include <pthread.h> | |||
#if DEBUGLEVEL < 1 | |||
#define ZSTD_pthread_mutex_t pthread_mutex_t | |||
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) | |||
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) | |||
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) | |||
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) | |||
#define ZSTD_pthread_cond_t pthread_cond_t | |||
#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) | |||
#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) | |||
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) | |||
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) | |||
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) | |||
#define ZSTD_pthread_t pthread_t | |||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) | |||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) | |||
#else /* DEBUGLEVEL >= 1 */ | |||
/* Debug implementation of threading. | |||
* In this implementation we use pointers for mutexes and condition variables. | |||
* This way, if we forget to init/destroy them the program will crash or ASAN | |||
* will report leaks. | |||
*/ | |||
#define ZSTD_pthread_mutex_t pthread_mutex_t* | |||
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); | |||
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); | |||
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) | |||
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) | |||
#define ZSTD_pthread_cond_t pthread_cond_t* | |||
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); | |||
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); | |||
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) | |||
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) | |||
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) | |||
#define ZSTD_pthread_t pthread_t | |||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) | |||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) | |||
#endif | |||
#else /* ZSTD_MULTITHREAD not defined */ | |||
/* No multithreading support */ | |||
#define pthread_mutex_t int /* #define rather than typedef, because sometimes pthread support is implicit, resulting in duplicated symbols */ | |||
#define pthread_mutex_init(a,b) ((void)a, 0) | |||
#define pthread_mutex_destroy(a) | |||
#define pthread_mutex_lock(a) | |||
#define pthread_mutex_unlock(a) | |||
typedef int ZSTD_pthread_mutex_t; | |||
#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) | |||
#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_mutex_lock(a) ((void)(a)) | |||
#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) | |||
#define pthread_cond_t int | |||
#define pthread_cond_init(a,b) ((void)a, 0) | |||
#define pthread_cond_destroy(a) | |||
#define pthread_cond_wait(a,b) | |||
#define pthread_cond_signal(a) | |||
#define pthread_cond_broadcast(a) | |||
typedef int ZSTD_pthread_cond_t; | |||
#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) | |||
#define ZSTD_pthread_cond_destroy(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) | |||
#define ZSTD_pthread_cond_signal(a) ((void)(a)) | |||
#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) | |||
/* do not use pthread_t */ | |||
/* do not use ZSTD_pthread_t */ | |||
#endif /* ZSTD_MULTITHREAD */ | |||
@@ -1,211 +0,0 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef DICTBUILDER_H_001 | |||
#define DICTBUILDER_H_001 | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/*====== Dependencies ======*/ | |||
#include <stddef.h> /* size_t */ | |||
/* ===== ZDICTLIB_API : control library symbols visibility ===== */ | |||
#ifndef ZDICTLIB_VISIBILITY | |||
# if defined(__GNUC__) && (__GNUC__ >= 4) | |||
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) | |||
# else | |||
# define ZDICTLIB_VISIBILITY | |||
# endif | |||
#endif | |||
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |||
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY | |||
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |||
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |||
#else | |||
# define ZDICTLIB_API ZDICTLIB_VISIBILITY | |||
#endif | |||
/*! ZDICT_trainFromBuffer(): | |||
* Train a dictionary from an array of samples. | |||
* Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |||
/*====== Helper functions ======*/ | |||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ | |||
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); | |||
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); | |||
#ifdef ZDICT_STATIC_LINKING_ONLY | |||
/* ==================================================================================== | |||
* The definitions in this section are considered experimental. | |||
* They should never be used with a dynamic library, as they may change in the future. | |||
* They are provided for advanced usages. | |||
* Use them only in association with static linking. | |||
* ==================================================================================== */ | |||
typedef struct { | |||
int compressionLevel; /* 0 means default; target a specific zstd compression level */ | |||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ | |||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ | |||
} ZDICT_params_t; | |||
/*! ZDICT_cover_params_t: | |||
* For all values 0 means default. | |||
* k and d are the only required parameters. | |||
*/ | |||
typedef struct { | |||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ | |||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ | |||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ | |||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ | |||
ZDICT_params_t zParams; | |||
} ZDICT_cover_params_t; | |||
/*! ZDICT_trainFromBuffer_cover(): | |||
* Train a dictionary from an array of samples using the COVER algorithm. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( | |||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |||
const size_t *samplesSizes, unsigned nbSamples, | |||
ZDICT_cover_params_t parameters); | |||
/*! ZDICT_optimizeTrainFromBuffer_cover(): | |||
* The same requirements as above hold for all the parameters except `parameters`. | |||
* This function tries many parameter combinations and picks the best parameters. | |||
* `*parameters` is filled with the best parameters found, and the dictionary | |||
* constructed with those parameters is stored in `dictBuffer`. | |||
* | |||
* All of the parameters d, k, steps are optional. | |||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. | |||
* if steps is zero it defaults to its default value. | |||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. | |||
* | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* On success `*parameters` contains the parameters selected. | |||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( | |||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |||
const size_t *samplesSizes, unsigned nbSamples, | |||
ZDICT_cover_params_t *parameters); | |||
/*! ZDICT_finalizeDictionary(): | |||
* Given a custom content as a basis for dictionary, and a set of samples, | |||
* finalize dictionary by adding headers and statistics. | |||
* | |||
* Samples must be stored concatenated in a flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. | |||
* | |||
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. | |||
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. | |||
* | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), | |||
* or an error code, which can be tested by ZDICT_isError(). | |||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. | |||
* Note 2: dictBuffer and dictContent can overlap | |||
*/ | |||
#define ZDICT_CONTENTSIZE_MIN 128 | |||
#define ZDICT_DICTSIZE_MIN 256 | |||
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, | |||
const void* dictContent, size_t dictContentSize, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, | |||
ZDICT_params_t parameters); | |||
typedef struct { | |||
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ | |||
ZDICT_params_t zParams; | |||
} ZDICT_legacy_params_t; | |||
/*! ZDICT_trainFromBuffer_legacy(): | |||
* Train a dictionary from an array of samples. | |||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, | |||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. | |||
* The resulting dictionary will be saved into `dictBuffer`. | |||
* `parameters` is optional and can be provided with values set to 0 to mean "default". | |||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) | |||
* or an error code, which can be tested with ZDICT_isError(). | |||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB. | |||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. | |||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot. | |||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary. | |||
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. | |||
*/ | |||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( | |||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | |||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters); | |||
/* Deprecation warnings */ | |||
/* It is generally possible to disable deprecation warnings from compiler, | |||
for example with -Wno-deprecated-declarations for gcc | |||
or _CRT_SECURE_NO_WARNINGS in Visual. | |||
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |||
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS | |||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ | |||
#else | |||
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) | |||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ | |||
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API | |||
# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) | |||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) | |||
# elif (ZDICT_GCC_VERSION >= 301) | |||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) | |||
# elif defined(_MSC_VER) | |||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) | |||
# else | |||
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") | |||
# define ZDICT_DEPRECATED(message) ZDICTLIB_API | |||
# endif | |||
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ | |||
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") | |||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, | |||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); | |||
#endif /* ZDICT_STATIC_LINKING_ONLY */ | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* DICTBUILDER_H_001 */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -30,23 +30,26 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } | |||
/*-**************************************** | |||
* ZSTD Error Management | |||
******************************************/ | |||
#undef ZSTD_isError /* defined within zstd_internal.h */ | |||
/*! ZSTD_isError() : | |||
* tells if a return value is an error code */ | |||
* tells if a return value is an error code | |||
* symbol is required for external callers */ | |||
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } | |||
/*! ZSTD_getErrorName() : | |||
* provides error code string from function result (useful for debugging) */ | |||
* provides error code string from function result (useful for debugging) */ | |||
const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } | |||
/*! ZSTD_getError() : | |||
* convert a `size_t` function result into a proper ZSTD_errorCode enum */ | |||
* convert a `size_t` function result into a proper ZSTD_errorCode enum */ | |||
ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } | |||
/*! ZSTD_getErrorString() : | |||
* provides error code string from enum */ | |||
* provides error code string from enum */ | |||
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } | |||
/*=************************************************************** | |||
* Custom allocator | |||
****************************************************************/ |
@@ -0,0 +1,158 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include "zstd_compress_literals.h" | |||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
BYTE* const ostart = (BYTE* const)dst; | |||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); | |||
switch(flSize) | |||
{ | |||
case 1: /* 2 - 1 - 5 */ | |||
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); | |||
break; | |||
case 2: /* 2 - 2 - 12 */ | |||
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); | |||
break; | |||
case 3: /* 2 - 2 - 20 */ | |||
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); | |||
break; | |||
default: /* not necessary : flSize is {1,2,3} */ | |||
assert(0); | |||
} | |||
memcpy(ostart + flSize, src, srcSize); | |||
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); | |||
return srcSize + flSize; | |||
} | |||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
BYTE* const ostart = (BYTE* const)dst; | |||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |||
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ | |||
switch(flSize) | |||
{ | |||
case 1: /* 2 - 1 - 5 */ | |||
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); | |||
break; | |||
case 2: /* 2 - 2 - 12 */ | |||
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); | |||
break; | |||
case 3: /* 2 - 2 - 20 */ | |||
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); | |||
break; | |||
default: /* not necessary : flSize is {1,2,3} */ | |||
assert(0); | |||
} | |||
ostart[flSize] = *(const BYTE*)src; | |||
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); | |||
return flSize+1; | |||
} | |||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, int disableLiteralCompression, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize, | |||
const int bmi2) | |||
{ | |||
size_t const minGain = ZSTD_minGain(srcSize, strategy); | |||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); | |||
BYTE* const ostart = (BYTE*)dst; | |||
U32 singleStream = srcSize < 256; | |||
symbolEncodingType_e hType = set_compressed; | |||
size_t cLitSize; | |||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", | |||
disableLiteralCompression, (U32)srcSize); | |||
/* Prepare nextEntropy assuming reusing the existing table */ | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
if (disableLiteralCompression) | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
/* small ? don't even attempt compression (speed opt) */ | |||
# define COMPRESS_LITERALS_SIZE_MIN 63 | |||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
} | |||
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); | |||
{ HUF_repeat repeat = prevHuf->repeatMode; | |||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; | |||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; | |||
cLitSize = singleStream ? | |||
HUF_compress1X_repeat( | |||
ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, | |||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : | |||
HUF_compress4X_repeat( | |||
ostart+lhSize, dstCapacity-lhSize, src, srcSize, | |||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, | |||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |||
if (repeat != HUF_repeat_none) { | |||
/* reused the existing table */ | |||
DEBUGLOG(5, "Reusing previous huffman table"); | |||
hType = set_repeat; | |||
} | |||
} | |||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |||
} | |||
if (cLitSize==1) { | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |||
} | |||
if (hType == set_compressed) { | |||
/* using a newly constructed table */ | |||
nextHuf->repeatMode = HUF_repeat_check; | |||
} | |||
/* Build header */ | |||
switch(lhSize) | |||
{ | |||
case 3: /* 2 - 2 - 10 - 10 */ | |||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |||
MEM_writeLE24(ostart, lhc); | |||
break; | |||
} | |||
case 4: /* 2 - 2 - 14 - 14 */ | |||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); | |||
MEM_writeLE32(ostart, lhc); | |||
break; | |||
} | |||
case 5: /* 2 - 2 - 18 - 18 */ | |||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); | |||
MEM_writeLE32(ostart, lhc); | |||
ostart[4] = (BYTE)(cLitSize >> 10); | |||
break; | |||
} | |||
default: /* not possible : lhSize is {3,4,5} */ | |||
assert(0); | |||
} | |||
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); | |||
return lhSize+cLitSize; | |||
} |
@@ -0,0 +1,29 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_COMPRESS_LITERALS_H | |||
#define ZSTD_COMPRESS_LITERALS_H | |||
#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ | |||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_strategy strategy, int disableLiteralCompression, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize, | |||
const int bmi2); | |||
#endif /* ZSTD_COMPRESS_LITERALS_H */ |
@@ -0,0 +1,419 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include "zstd_compress_sequences.h" | |||
/** | |||
* -log2(x / 256) lookup table for x in [0, 256). | |||
* If x == 0: Return 0 | |||
* Else: Return floor(-log2(x / 256) * 256) | |||
*/ | |||
static unsigned const kInverseProbabilityLog256[256] = { | |||
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, | |||
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, | |||
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, | |||
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, | |||
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, | |||
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, | |||
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, | |||
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, | |||
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, | |||
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, | |||
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, | |||
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, | |||
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, | |||
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, | |||
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, | |||
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, | |||
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, | |||
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, | |||
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, | |||
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, | |||
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, | |||
5, 4, 2, 1, | |||
}; | |||
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { | |||
void const* ptr = ctable; | |||
U16 const* u16ptr = (U16 const*)ptr; | |||
U32 const maxSymbolValue = MEM_read16(u16ptr + 1); | |||
return maxSymbolValue; | |||
} | |||
/** | |||
* Returns the cost in bytes of encoding the normalized count header. | |||
* Returns an error if any of the helper functions return an error. | |||
*/ | |||
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, | |||
size_t const nbSeq, unsigned const FSELog) | |||
{ | |||
BYTE wksp[FSE_NCOUNTBOUND]; | |||
S16 norm[MaxSeq + 1]; | |||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); | |||
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); | |||
} | |||
/** | |||
* Returns the cost in bits of encoding the distribution described by count | |||
* using the entropy bound. | |||
*/ | |||
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) | |||
{ | |||
unsigned cost = 0; | |||
unsigned s; | |||
for (s = 0; s <= max; ++s) { | |||
unsigned norm = (unsigned)((256 * count[s]) / total); | |||
if (count[s] != 0 && norm == 0) | |||
norm = 1; | |||
assert(count[s] < total); | |||
cost += count[s] * kInverseProbabilityLog256[norm]; | |||
} | |||
return cost >> 8; | |||
} | |||
/** | |||
* Returns the cost in bits of encoding the distribution in count using ctable. | |||
* Returns an error if ctable cannot represent all the symbols in count. | |||
*/ | |||
size_t ZSTD_fseBitCost( | |||
FSE_CTable const* ctable, | |||
unsigned const* count, | |||
unsigned const max) | |||
{ | |||
unsigned const kAccuracyLog = 8; | |||
size_t cost = 0; | |||
unsigned s; | |||
FSE_CState_t cstate; | |||
FSE_initCState(&cstate, ctable); | |||
if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { | |||
DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", | |||
ZSTD_getFSEMaxSymbolValue(ctable), max); | |||
return ERROR(GENERIC); | |||
} | |||
for (s = 0; s <= max; ++s) { | |||
unsigned const tableLog = cstate.stateLog; | |||
unsigned const badCost = (tableLog + 1) << kAccuracyLog; | |||
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); | |||
if (count[s] == 0) | |||
continue; | |||
if (bitCost >= badCost) { | |||
DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); | |||
return ERROR(GENERIC); | |||
} | |||
cost += (size_t)count[s] * bitCost; | |||
} | |||
return cost >> kAccuracyLog; | |||
} | |||
/** | |||
* Returns the cost in bits of encoding the distribution in count using the | |||
* table described by norm. The max symbol support by norm is assumed >= max. | |||
* norm must be valid for every symbol with non-zero probability in count. | |||
*/ | |||
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, | |||
unsigned const* count, unsigned const max) | |||
{ | |||
unsigned const shift = 8 - accuracyLog; | |||
size_t cost = 0; | |||
unsigned s; | |||
assert(accuracyLog <= 8); | |||
for (s = 0; s <= max; ++s) { | |||
unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; | |||
unsigned const norm256 = normAcc << shift; | |||
assert(norm256 > 0); | |||
assert(norm256 < 256); | |||
cost += count[s] * kInverseProbabilityLog256[norm256]; | |||
} | |||
return cost >> 8; | |||
} | |||
symbolEncodingType_e | |||
ZSTD_selectEncodingType( | |||
FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |||
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |||
FSE_CTable const* prevCTable, | |||
short const* defaultNorm, U32 defaultNormLog, | |||
ZSTD_defaultPolicy_e const isDefaultAllowed, | |||
ZSTD_strategy const strategy) | |||
{ | |||
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); | |||
if (mostFrequent == nbSeq) { | |||
*repeatMode = FSE_repeat_none; | |||
if (isDefaultAllowed && nbSeq <= 2) { | |||
/* Prefer set_basic over set_rle when there are 2 or less symbols, | |||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. | |||
* If basic encoding isn't possible, always choose RLE. | |||
*/ | |||
DEBUGLOG(5, "Selected set_basic"); | |||
return set_basic; | |||
} | |||
DEBUGLOG(5, "Selected set_rle"); | |||
return set_rle; | |||
} | |||
if (strategy < ZSTD_lazy) { | |||
if (isDefaultAllowed) { | |||
size_t const staticFse_nbSeq_max = 1000; | |||
size_t const mult = 10 - strategy; | |||
size_t const baseLog = 3; | |||
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ | |||
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ | |||
assert(mult <= 9 && mult >= 7); | |||
if ( (*repeatMode == FSE_repeat_valid) | |||
&& (nbSeq < staticFse_nbSeq_max) ) { | |||
DEBUGLOG(5, "Selected set_repeat"); | |||
return set_repeat; | |||
} | |||
if ( (nbSeq < dynamicFse_nbSeq_min) | |||
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { | |||
DEBUGLOG(5, "Selected set_basic"); | |||
/* The format allows default tables to be repeated, but it isn't useful. | |||
* When using simple heuristics to select encoding type, we don't want | |||
* to confuse these tables with dictionaries. When running more careful | |||
* analysis, we don't need to waste time checking both repeating tables | |||
* and default tables. | |||
*/ | |||
*repeatMode = FSE_repeat_none; | |||
return set_basic; | |||
} | |||
} | |||
} else { | |||
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); | |||
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); | |||
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); | |||
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); | |||
if (isDefaultAllowed) { | |||
assert(!ZSTD_isError(basicCost)); | |||
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); | |||
} | |||
assert(!ZSTD_isError(NCountCost)); | |||
assert(compressedCost < ERROR(maxCode)); | |||
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", | |||
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); | |||
if (basicCost <= repeatCost && basicCost <= compressedCost) { | |||
DEBUGLOG(5, "Selected set_basic"); | |||
assert(isDefaultAllowed); | |||
*repeatMode = FSE_repeat_none; | |||
return set_basic; | |||
} | |||
if (repeatCost <= compressedCost) { | |||
DEBUGLOG(5, "Selected set_repeat"); | |||
assert(!ZSTD_isError(repeatCost)); | |||
return set_repeat; | |||
} | |||
assert(compressedCost < basicCost && compressedCost < repeatCost); | |||
} | |||
DEBUGLOG(5, "Selected set_compressed"); | |||
*repeatMode = FSE_repeat_check; | |||
return set_compressed; | |||
} | |||
size_t | |||
ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |||
unsigned* count, U32 max, | |||
const BYTE* codeTable, size_t nbSeq, | |||
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |||
const FSE_CTable* prevCTable, size_t prevCTableSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize) | |||
{ | |||
BYTE* op = (BYTE*)dst; | |||
const BYTE* const oend = op + dstCapacity; | |||
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); | |||
switch (type) { | |||
case set_rle: | |||
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); | |||
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); | |||
*op = codeTable[0]; | |||
return 1; | |||
case set_repeat: | |||
memcpy(nextCTable, prevCTable, prevCTableSize); | |||
return 0; | |||
case set_basic: | |||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ | |||
return 0; | |||
case set_compressed: { | |||
S16 norm[MaxSeq + 1]; | |||
size_t nbSeq_1 = nbSeq; | |||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |||
if (count[codeTable[nbSeq-1]] > 1) { | |||
count[codeTable[nbSeq-1]]--; | |||
nbSeq_1--; | |||
} | |||
assert(nbSeq_1 > 1); | |||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); | |||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ | |||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); | |||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); | |||
return NCountSize; | |||
} | |||
} | |||
default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); | |||
} | |||
} | |||
FORCE_INLINE_TEMPLATE size_t | |||
ZSTD_encodeSequences_body( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
{ | |||
BIT_CStream_t blockStream; | |||
FSE_CState_t stateMatchLength; | |||
FSE_CState_t stateOffsetBits; | |||
FSE_CState_t stateLitLength; | |||
RETURN_ERROR_IF( | |||
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), | |||
dstSize_tooSmall, "not enough space remaining"); | |||
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", | |||
(int)(blockStream.endPtr - blockStream.startPtr), | |||
(unsigned)dstCapacity); | |||
/* first symbols */ | |||
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); | |||
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); | |||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); | |||
if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); | |||
if (MEM_32bits()) BIT_flushBits(&blockStream); | |||
if (longOffsets) { | |||
U32 const ofBits = ofCodeTable[nbSeq-1]; | |||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
if (extraBits) { | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); | |||
BIT_flushBits(&blockStream); | |||
} | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, | |||
ofBits - extraBits); | |||
} else { | |||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); | |||
} | |||
BIT_flushBits(&blockStream); | |||
{ size_t n; | |||
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ | |||
BYTE const llCode = llCodeTable[n]; | |||
BYTE const ofCode = ofCodeTable[n]; | |||
BYTE const mlCode = mlCodeTable[n]; | |||
U32 const llBits = LL_bits[llCode]; | |||
U32 const ofBits = ofCode; | |||
U32 const mlBits = ML_bits[mlCode]; | |||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", | |||
(unsigned)sequences[n].litLength, | |||
(unsigned)sequences[n].matchLength + MINMATCH, | |||
(unsigned)sequences[n].offset); | |||
/* 32b*/ /* 64b*/ | |||
/* (7)*/ /* (7)*/ | |||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ | |||
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ | |||
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ | |||
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ | |||
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
BIT_addBits(&blockStream, sequences[n].litLength, llBits); | |||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); | |||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); | |||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); | |||
if (longOffsets) { | |||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |||
if (extraBits) { | |||
BIT_addBits(&blockStream, sequences[n].offset, extraBits); | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
} | |||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits, | |||
ofBits - extraBits); /* 31 */ | |||
} else { | |||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ | |||
} | |||
BIT_flushBits(&blockStream); /* (7)*/ | |||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); | |||
} } | |||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); | |||
FSE_flushCState(&blockStream, &stateMatchLength); | |||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); | |||
FSE_flushCState(&blockStream, &stateOffsetBits); | |||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); | |||
FSE_flushCState(&blockStream, &stateLitLength); | |||
{ size_t const streamSize = BIT_closeCStream(&blockStream); | |||
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); | |||
return streamSize; | |||
} | |||
} | |||
static size_t | |||
ZSTD_encodeSequences_default( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
{ | |||
return ZSTD_encodeSequences_body(dst, dstCapacity, | |||
CTable_MatchLength, mlCodeTable, | |||
CTable_OffsetBits, ofCodeTable, | |||
CTable_LitLength, llCodeTable, | |||
sequences, nbSeq, longOffsets); | |||
} | |||
#if DYNAMIC_BMI2 | |||
static TARGET_ATTRIBUTE("bmi2") size_t | |||
ZSTD_encodeSequences_bmi2( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
seqDef const* sequences, size_t nbSeq, int longOffsets) | |||
{ | |||
return ZSTD_encodeSequences_body(dst, dstCapacity, | |||
CTable_MatchLength, mlCodeTable, | |||
CTable_OffsetBits, ofCodeTable, | |||
CTable_LitLength, llCodeTable, | |||
sequences, nbSeq, longOffsets); | |||
} | |||
#endif | |||
size_t ZSTD_encodeSequences( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) | |||
{ | |||
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); | |||
#if DYNAMIC_BMI2 | |||
if (bmi2) { | |||
return ZSTD_encodeSequences_bmi2(dst, dstCapacity, | |||
CTable_MatchLength, mlCodeTable, | |||
CTable_OffsetBits, ofCodeTable, | |||
CTable_LitLength, llCodeTable, | |||
sequences, nbSeq, longOffsets); | |||
} | |||
#endif | |||
(void)bmi2; | |||
return ZSTD_encodeSequences_default(dst, dstCapacity, | |||
CTable_MatchLength, mlCodeTable, | |||
CTable_OffsetBits, ofCodeTable, | |||
CTable_LitLength, llCodeTable, | |||
sequences, nbSeq, longOffsets); | |||
} |
@@ -0,0 +1,54 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_COMPRESS_SEQUENCES_H | |||
#define ZSTD_COMPRESS_SEQUENCES_H | |||
#include "fse.h" /* FSE_repeat, FSE_CTable */ | |||
#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ | |||
typedef enum { | |||
ZSTD_defaultDisallowed = 0, | |||
ZSTD_defaultAllowed = 1 | |||
} ZSTD_defaultPolicy_e; | |||
symbolEncodingType_e | |||
ZSTD_selectEncodingType( | |||
FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |||
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |||
FSE_CTable const* prevCTable, | |||
short const* defaultNorm, U32 defaultNormLog, | |||
ZSTD_defaultPolicy_e const isDefaultAllowed, | |||
ZSTD_strategy const strategy); | |||
size_t | |||
ZSTD_buildCTable(void* dst, size_t dstCapacity, | |||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |||
unsigned* count, U32 max, | |||
const BYTE* codeTable, size_t nbSeq, | |||
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |||
const FSE_CTable* prevCTable, size_t prevCTableSize, | |||
void* entropyWorkspace, size_t entropyWorkspaceSize); | |||
size_t ZSTD_encodeSequences( | |||
void* dst, size_t dstCapacity, | |||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |||
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); | |||
size_t ZSTD_fseBitCost( | |||
FSE_CTable const* ctable, | |||
unsigned const* count, | |||
unsigned const max); | |||
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, | |||
unsigned const* count, unsigned const max); | |||
#endif /* ZSTD_COMPRESS_SEQUENCES_H */ |
@@ -0,0 +1,845 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include "zstd_compress_superblock.h" | |||
#include "zstd_internal.h" /* ZSTD_getSequenceLength */ | |||
#include "hist.h" /* HIST_countFast_wksp */ | |||
#include "zstd_compress_internal.h" | |||
#include "zstd_compress_sequences.h" | |||
#include "zstd_compress_literals.h" | |||
/*-************************************* | |||
* Superblock entropy buffer structs | |||
***************************************/ | |||
/** ZSTD_hufCTablesMetadata_t : | |||
* Stores Literals Block Type for a super-block in hType, and | |||
* huffman tree description in hufDesBuffer. | |||
* hufDesSize refers to the size of huffman tree description in bytes. | |||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ | |||
typedef struct { | |||
symbolEncodingType_e hType; | |||
BYTE hufDesBuffer[500]; /* TODO give name to this value */ | |||
size_t hufDesSize; | |||
} ZSTD_hufCTablesMetadata_t; | |||
/** ZSTD_fseCTablesMetadata_t : | |||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and | |||
* fse tables in fseTablesBuffer. | |||
* fseTablesSize refers to the size of fse tables in bytes. | |||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ | |||
typedef struct { | |||
symbolEncodingType_e llType; | |||
symbolEncodingType_e ofType; | |||
symbolEncodingType_e mlType; | |||
BYTE fseTablesBuffer[500]; /* TODO give name to this value */ | |||
size_t fseTablesSize; | |||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ | |||
} ZSTD_fseCTablesMetadata_t; | |||
typedef struct { | |||
ZSTD_hufCTablesMetadata_t hufMetadata; | |||
ZSTD_fseCTablesMetadata_t fseMetadata; | |||
} ZSTD_entropyCTablesMetadata_t; | |||
/** ZSTD_buildSuperBlockEntropy_literal() : | |||
* Builds entropy for the super-block literals. | |||
* Stores literals block type (raw, rle, compressed, repeat) and | |||
* huffman description table to hufMetadata. | |||
* @return : size of huffman description table or error code */ | |||
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, | |||
const ZSTD_hufCTables_t* prevHuf, | |||
ZSTD_hufCTables_t* nextHuf, | |||
ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
const int disableLiteralsCompression, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
BYTE* const wkspStart = (BYTE*)workspace; | |||
BYTE* const wkspEnd = wkspStart + wkspSize; | |||
BYTE* const countWkspStart = wkspStart; | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); | |||
BYTE* const nodeWksp = countWkspStart + countWkspSize; | |||
const size_t nodeWkspSize = wkspEnd-nodeWksp; | |||
unsigned maxSymbolValue = 255; | |||
unsigned huffLog = HUF_TABLELOG_DEFAULT; | |||
HUF_repeat repeat = prevHuf->repeatMode; | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); | |||
/* Prepare nextEntropy assuming reusing the existing table */ | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
if (disableLiteralsCompression) { | |||
DEBUGLOG(5, "set_basic - disabled"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
/* small ? don't even attempt compression (speed opt) */ | |||
# define COMPRESS_LITERALS_SIZE_MIN 63 | |||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |||
if (srcSize <= minLitSize) { | |||
DEBUGLOG(5, "set_basic - too small"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
} | |||
/* Scan input and build symbol stats */ | |||
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); | |||
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); | |||
if (largest == srcSize) { | |||
DEBUGLOG(5, "set_rle"); | |||
hufMetadata->hType = set_rle; | |||
return 0; | |||
} | |||
if (largest <= (srcSize >> 7)+4) { | |||
DEBUGLOG(5, "set_basic - no gain"); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
} | |||
/* Validate the previous Huffman table */ | |||
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { | |||
repeat = HUF_repeat_none; | |||
} | |||
/* Build Huffman Tree */ | |||
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); | |||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); | |||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, | |||
maxSymbolValue, huffLog, | |||
nodeWksp, nodeWkspSize); | |||
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); | |||
huffLog = (U32)maxBits; | |||
{ /* Build and write the CTable */ | |||
size_t const newCSize = HUF_estimateCompressedSize( | |||
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); | |||
size_t const hSize = HUF_writeCTable( | |||
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), | |||
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); | |||
/* Check against repeating the previous CTable */ | |||
if (repeat != HUF_repeat_none) { | |||
size_t const oldCSize = HUF_estimateCompressedSize( | |||
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); | |||
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { | |||
DEBUGLOG(5, "set_repeat - smaller"); | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
hufMetadata->hType = set_repeat; | |||
return 0; | |||
} | |||
} | |||
if (newCSize + hSize >= srcSize) { | |||
DEBUGLOG(5, "set_basic - no gains"); | |||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |||
hufMetadata->hType = set_basic; | |||
return 0; | |||
} | |||
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); | |||
hufMetadata->hType = set_compressed; | |||
nextHuf->repeatMode = HUF_repeat_check; | |||
return hSize; | |||
} | |||
} | |||
} | |||
/** ZSTD_buildSuperBlockEntropy_sequences() : | |||
* Builds entropy for the super-block sequences. | |||
* Stores symbol compression modes and fse table to fseMetadata. | |||
* @return : size of fse tables or error code */ | |||
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, | |||
const ZSTD_fseCTables_t* prevEntropy, | |||
ZSTD_fseCTables_t* nextEntropy, | |||
const ZSTD_CCtx_params* cctxParams, | |||
ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
BYTE* const wkspStart = (BYTE*)workspace; | |||
BYTE* const wkspEnd = wkspStart + wkspSize; | |||
BYTE* const countWkspStart = wkspStart; | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); | |||
BYTE* const cTableWksp = countWkspStart + countWkspSize; | |||
const size_t cTableWkspSize = wkspEnd-cTableWksp; | |||
ZSTD_strategy const strategy = cctxParams->cParams.strategy; | |||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; | |||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; | |||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; | |||
const BYTE* const ofCodeTable = seqStorePtr->ofCode; | |||
const BYTE* const llCodeTable = seqStorePtr->llCode; | |||
const BYTE* const mlCodeTable = seqStorePtr->mlCode; | |||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; | |||
BYTE* const ostart = fseMetadata->fseTablesBuffer; | |||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); | |||
BYTE* op = ostart; | |||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); | |||
memset(workspace, 0, wkspSize); | |||
fseMetadata->lastCountSize = 0; | |||
/* convert length/distances into codes */ | |||
ZSTD_seqToCodes(seqStorePtr); | |||
/* build CTable for Literal Lengths */ | |||
{ U32 LLtype; | |||
unsigned max = MaxLL; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
DEBUGLOG(5, "Building LL table"); | |||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; | |||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
LLFSELog, prevEntropy->litlengthCTable, | |||
LL_defaultNorm, LL_defaultNormLog, | |||
ZSTD_defaultAllowed, strategy); | |||
assert(set_basic < set_compressed && set_rle < set_compressed); | |||
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |||
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, | |||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); | |||
if (LLtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->llType = (symbolEncodingType_e) LLtype; | |||
} } | |||
/* build CTable for Offsets */ | |||
{ U32 Offtype; | |||
unsigned max = MaxOff; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ | |||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; | |||
DEBUGLOG(5, "Building OF table"); | |||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; | |||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
OffFSELog, prevEntropy->offcodeCTable, | |||
OF_defaultNorm, OF_defaultNormLog, | |||
defaultPolicy, strategy); | |||
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |||
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, | |||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); | |||
if (Offtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->ofType = (symbolEncodingType_e) Offtype; | |||
} } | |||
/* build CTable for MatchLengths */ | |||
{ U32 MLtype; | |||
unsigned max = MaxML; | |||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); | |||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; | |||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, | |||
countWksp, max, mostFrequent, nbSeq, | |||
MLFSELog, prevEntropy->matchlengthCTable, | |||
ML_defaultNorm, ML_defaultNormLog, | |||
ZSTD_defaultAllowed, strategy); | |||
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ | |||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |||
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, | |||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), | |||
cTableWksp, cTableWkspSize); | |||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); | |||
if (MLtype == set_compressed) | |||
fseMetadata->lastCountSize = countSize; | |||
op += countSize; | |||
fseMetadata->mlType = (symbolEncodingType_e) MLtype; | |||
} } | |||
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); | |||
return op-ostart; | |||
} | |||
/** ZSTD_buildSuperBlockEntropy() : | |||
* Builds entropy for the super-block. | |||
* @return : 0 on success or error code */ | |||
static size_t | |||
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, | |||
const ZSTD_entropyCTables_t* prevEntropy, | |||
ZSTD_entropyCTables_t* nextEntropy, | |||
const ZSTD_CCtx_params* cctxParams, | |||
ZSTD_entropyCTablesMetadata_t* entropyMetadata, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; | |||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); | |||
entropyMetadata->hufMetadata.hufDesSize = | |||
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, | |||
&prevEntropy->huf, &nextEntropy->huf, | |||
&entropyMetadata->hufMetadata, | |||
ZSTD_disableLiteralsCompression(cctxParams), | |||
workspace, wkspSize); | |||
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); | |||
entropyMetadata->fseMetadata.fseTablesSize = | |||
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, | |||
&prevEntropy->fse, &nextEntropy->fse, | |||
cctxParams, | |||
&entropyMetadata->fseMetadata, | |||
workspace, wkspSize); | |||
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); | |||
return 0; | |||
} | |||
/** ZSTD_compressSubBlock_literal() : | |||
* Compresses literals section for a sub-block. | |||
* When we have to write the Huffman table we will sometimes choose a header | |||
* size larger than necessary. This is because we have to pick the header size | |||
* before we know the table size + compressed size, so we have a bound on the | |||
* table size. If we guessed incorrectly, we fall back to uncompressed literals. | |||
* | |||
* We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded | |||
* in writing the header, otherwise it is set to 0. | |||
* | |||
* hufMetadata->hType has literals block type info. | |||
* If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. | |||
* If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. | |||
* If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block | |||
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block | |||
* and the following sub-blocks' literals sections will be Treeless_Literals_Block. | |||
* @return : compressed size of literals section of a sub-block | |||
* Or 0 if it unable to compress. | |||
* Or error code */ | |||
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, | |||
const ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
const BYTE* literals, size_t litSize, | |||
void* dst, size_t dstSize, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
{ | |||
size_t const header = writeEntropy ? 200 : 0; | |||
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); | |||
BYTE* const ostart = (BYTE*)dst; | |||
BYTE* const oend = ostart + dstSize; | |||
BYTE* op = ostart + lhSize; | |||
U32 const singleStream = lhSize == 3; | |||
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; | |||
size_t cLitSize = 0; | |||
(void)bmi2; /* TODO bmi2... */ | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); | |||
*entropyWritten = 0; | |||
if (litSize == 0 || hufMetadata->hType == set_basic) { | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); | |||
return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); | |||
} else if (hufMetadata->hType == set_rle) { | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); | |||
return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); | |||
} | |||
assert(litSize > 0); | |||
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); | |||
if (writeEntropy && hufMetadata->hType == set_compressed) { | |||
memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); | |||
op += hufMetadata->hufDesSize; | |||
cLitSize += hufMetadata->hufDesSize; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); | |||
} | |||
/* TODO bmi2 */ | |||
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) | |||
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); | |||
op += cSize; | |||
cLitSize += cSize; | |||
if (cSize == 0 || ERR_isError(cSize)) { | |||
DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); | |||
return 0; | |||
} | |||
/* If we expand and we aren't writing a header then emit uncompressed */ | |||
if (!writeEntropy && cLitSize >= litSize) { | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); | |||
return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); | |||
} | |||
/* If we are writing headers then allow expansion that doesn't change our header size. */ | |||
if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { | |||
assert(cLitSize > litSize); | |||
DEBUGLOG(5, "Literals expanded beyond allowed header size"); | |||
return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); | |||
} | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); | |||
} | |||
/* Build header */ | |||
switch(lhSize) | |||
{ | |||
case 3: /* 2 - 2 - 10 - 10 */ | |||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); | |||
MEM_writeLE24(ostart, lhc); | |||
break; | |||
} | |||
case 4: /* 2 - 2 - 14 - 14 */ | |||
{ U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); | |||
MEM_writeLE32(ostart, lhc); | |||
break; | |||
} | |||
case 5: /* 2 - 2 - 18 - 18 */ | |||
{ U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); | |||
MEM_writeLE32(ostart, lhc); | |||
ostart[4] = (BYTE)(cLitSize >> 10); | |||
break; | |||
} | |||
default: /* not possible : lhSize is {3,4,5} */ | |||
assert(0); | |||
} | |||
*entropyWritten = 1; | |||
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); | |||
return op-ostart; | |||
} | |||
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { | |||
const seqDef* const sstart = sequences; | |||
const seqDef* const send = sequences + nbSeq; | |||
const seqDef* sp = sstart; | |||
size_t matchLengthSum = 0; | |||
size_t litLengthSum = 0; | |||
while (send-sp > 0) { | |||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); | |||
litLengthSum += seqLen.litLength; | |||
matchLengthSum += seqLen.matchLength; | |||
sp++; | |||
} | |||
assert(litLengthSum <= litSize); | |||
if (!lastSequence) { | |||
assert(litLengthSum == litSize); | |||
} | |||
return matchLengthSum + litSize; | |||
} | |||
/** ZSTD_compressSubBlock_sequences() : | |||
* Compresses sequences section for a sub-block. | |||
* fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have | |||
* symbol compression modes for the super-block. | |||
* The first successfully compressed block will have these in its header. | |||
* We set entropyWritten=1 when we succeed in compressing the sequences. | |||
* The following sub-blocks will always have repeat mode. | |||
* @return : compressed size of sequences section of a sub-block | |||
* Or 0 if it is unable to compress | |||
* Or error code. */ | |||
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, | |||
const ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
const seqDef* sequences, size_t nbSeq, | |||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, | |||
const ZSTD_CCtx_params* cctxParams, | |||
void* dst, size_t dstCapacity, | |||
const int bmi2, int writeEntropy, int* entropyWritten) | |||
{ | |||
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; | |||
BYTE* const ostart = (BYTE*)dst; | |||
BYTE* const oend = ostart + dstCapacity; | |||
BYTE* op = ostart; | |||
BYTE* seqHead; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); | |||
*entropyWritten = 0; | |||
/* Sequences Header */ | |||
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, | |||
dstSize_tooSmall, ""); | |||
if (nbSeq < 0x7F) | |||
*op++ = (BYTE)nbSeq; | |||
else if (nbSeq < LONGNBSEQ) | |||
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; | |||
else | |||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; | |||
if (nbSeq==0) { | |||
return op - ostart; | |||
} | |||
/* seqHead : flags for FSE encoding type */ | |||
seqHead = op++; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); | |||
if (writeEntropy) { | |||
const U32 LLtype = fseMetadata->llType; | |||
const U32 Offtype = fseMetadata->ofType; | |||
const U32 MLtype = fseMetadata->mlType; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); | |||
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); | |||
memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); | |||
op += fseMetadata->fseTablesSize; | |||
} else { | |||
const U32 repeat = set_repeat; | |||
*seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); | |||
} | |||
{ size_t const bitstreamSize = ZSTD_encodeSequences( | |||
op, oend - op, | |||
fseTables->matchlengthCTable, mlCode, | |||
fseTables->offcodeCTable, ofCode, | |||
fseTables->litlengthCTable, llCode, | |||
sequences, nbSeq, | |||
longOffsets, bmi2); | |||
FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); | |||
op += bitstreamSize; | |||
/* zstd versions <= 1.3.4 mistakenly report corruption when | |||
* FSE_readNCount() receives a buffer < 4 bytes. | |||
* Fixed by https://github.com/facebook/zstd/pull/1146. | |||
* This can happen when the last set_compressed table present is 2 | |||
* bytes and the bitstream is only one byte. | |||
* In this exceedingly rare case, we will simply emit an uncompressed | |||
* block, since it isn't worth optimizing. | |||
*/ | |||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |||
if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { | |||
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ | |||
assert(fseMetadata->lastCountSize + bitstreamSize == 3); | |||
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " | |||
"emitting an uncompressed block."); | |||
return 0; | |||
} | |||
#endif | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); | |||
} | |||
/* zstd versions <= 1.4.0 mistakenly report error when | |||
* sequences section body size is less than 3 bytes. | |||
* Fixed by https://github.com/facebook/zstd/pull/1664. | |||
* This can happen when the previous sequences section block is compressed | |||
* with rle mode and the current block's sequences section is compressed | |||
* with repeat mode where sequences section body size can be 1 byte. | |||
*/ | |||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |||
if (op-seqHead < 4) { | |||
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " | |||
"an uncompressed block when sequences are < 4 bytes"); | |||
return 0; | |||
} | |||
#endif | |||
*entropyWritten = 1; | |||
return op - ostart; | |||
} | |||
/** ZSTD_compressSubBlock() : | |||
* Compresses a single sub-block. | |||
* @return : compressed size of the sub-block | |||
* Or 0 if it failed to compress. */ | |||
static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, | |||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata, | |||
const seqDef* sequences, size_t nbSeq, | |||
const BYTE* literals, size_t litSize, | |||
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, | |||
const ZSTD_CCtx_params* cctxParams, | |||
void* dst, size_t dstCapacity, | |||
const int bmi2, | |||
int writeLitEntropy, int writeSeqEntropy, | |||
int* litEntropyWritten, int* seqEntropyWritten, | |||
U32 lastBlock) | |||
{ | |||
BYTE* const ostart = (BYTE*)dst; | |||
BYTE* const oend = ostart + dstCapacity; | |||
BYTE* op = ostart + ZSTD_blockHeaderSize; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", | |||
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); | |||
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, | |||
&entropyMetadata->hufMetadata, literals, litSize, | |||
op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); | |||
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); | |||
if (cLitSize == 0) return 0; | |||
op += cLitSize; | |||
} | |||
{ size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, | |||
&entropyMetadata->fseMetadata, | |||
sequences, nbSeq, | |||
llCode, mlCode, ofCode, | |||
cctxParams, | |||
op, oend-op, | |||
bmi2, writeSeqEntropy, seqEntropyWritten); | |||
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); | |||
if (cSeqSize == 0) return 0; | |||
op += cSeqSize; | |||
} | |||
/* Write block header */ | |||
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; | |||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); | |||
MEM_writeLE24(ostart, cBlockHeader24); | |||
} | |||
return op-ostart; | |||
} | |||
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, | |||
const ZSTD_hufCTables_t* huf, | |||
const ZSTD_hufCTablesMetadata_t* hufMetadata, | |||
void* workspace, size_t wkspSize, | |||
int writeEntropy) | |||
{ | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
unsigned maxSymbolValue = 255; | |||
size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ | |||
if (hufMetadata->hType == set_basic) return litSize; | |||
else if (hufMetadata->hType == set_rle) return 1; | |||
else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { | |||
size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); | |||
if (ZSTD_isError(largest)) return litSize; | |||
{ size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); | |||
if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; | |||
return cLitSizeEstimate + literalSectionHeaderSize; | |||
} } | |||
assert(0); /* impossible */ | |||
return 0; | |||
} | |||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, | |||
const BYTE* codeTable, unsigned maxCode, | |||
size_t nbSeq, const FSE_CTable* fseCTable, | |||
const U32* additionalBits, | |||
short const* defaultNorm, U32 defaultNormLog, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
unsigned* const countWksp = (unsigned*)workspace; | |||
const BYTE* ctp = codeTable; | |||
const BYTE* const ctStart = ctp; | |||
const BYTE* const ctEnd = ctStart + nbSeq; | |||
size_t cSymbolTypeSizeEstimateInBits = 0; | |||
unsigned max = maxCode; | |||
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ | |||
if (type == set_basic) { | |||
cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); | |||
} else if (type == set_rle) { | |||
cSymbolTypeSizeEstimateInBits = 0; | |||
} else if (type == set_compressed || type == set_repeat) { | |||
cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); | |||
} | |||
if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; | |||
while (ctp < ctEnd) { | |||
if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; | |||
else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ | |||
ctp++; | |||
} | |||
return cSymbolTypeSizeEstimateInBits / 8; | |||
} | |||
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, | |||
const BYTE* llCodeTable, | |||
const BYTE* mlCodeTable, | |||
size_t nbSeq, | |||
const ZSTD_fseCTables_t* fseTables, | |||
const ZSTD_fseCTablesMetadata_t* fseMetadata, | |||
void* workspace, size_t wkspSize, | |||
int writeEntropy) | |||
{ | |||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ | |||
size_t cSeqSizeEstimate = 0; | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, | |||
nbSeq, fseTables->offcodeCTable, NULL, | |||
OF_defaultNorm, OF_defaultNormLog, | |||
workspace, wkspSize); | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, | |||
nbSeq, fseTables->litlengthCTable, LL_bits, | |||
LL_defaultNorm, LL_defaultNormLog, | |||
workspace, wkspSize); | |||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, | |||
nbSeq, fseTables->matchlengthCTable, ML_bits, | |||
ML_defaultNorm, ML_defaultNormLog, | |||
workspace, wkspSize); | |||
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; | |||
return cSeqSizeEstimate + sequencesSectionHeaderSize; | |||
} | |||
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, | |||
const BYTE* ofCodeTable, | |||
const BYTE* llCodeTable, | |||
const BYTE* mlCodeTable, | |||
size_t nbSeq, | |||
const ZSTD_entropyCTables_t* entropy, | |||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata, | |||
void* workspace, size_t wkspSize, | |||
int writeLitEntropy, int writeSeqEntropy) { | |||
size_t cSizeEstimate = 0; | |||
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, | |||
&entropy->huf, &entropyMetadata->hufMetadata, | |||
workspace, wkspSize, writeLitEntropy); | |||
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, | |||
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, | |||
workspace, wkspSize, writeSeqEntropy); | |||
return cSizeEstimate + ZSTD_blockHeaderSize; | |||
} | |||
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) | |||
{ | |||
if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) | |||
return 1; | |||
if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) | |||
return 1; | |||
if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) | |||
return 1; | |||
return 0; | |||
} | |||
/** ZSTD_compressSubBlock_multi() : | |||
* Breaks super-block into multiple sub-blocks and compresses them. | |||
* Entropy will be written to the first block. | |||
* The following blocks will use repeat mode to compress. | |||
* All sub-blocks are compressed blocks (no raw or rle blocks). | |||
* @return : compressed size of the super block (which is multiple ZSTD blocks) | |||
* Or 0 if it failed to compress. */ | |||
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, | |||
const ZSTD_compressedBlockState_t* prevCBlock, | |||
ZSTD_compressedBlockState_t* nextCBlock, | |||
const ZSTD_entropyCTablesMetadata_t* entropyMetadata, | |||
const ZSTD_CCtx_params* cctxParams, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
const int bmi2, U32 lastBlock, | |||
void* workspace, size_t wkspSize) | |||
{ | |||
const seqDef* const sstart = seqStorePtr->sequencesStart; | |||
const seqDef* const send = seqStorePtr->sequences; | |||
const seqDef* sp = sstart; | |||
const BYTE* const lstart = seqStorePtr->litStart; | |||
const BYTE* const lend = seqStorePtr->lit; | |||
const BYTE* lp = lstart; | |||
BYTE const* ip = (BYTE const*)src; | |||
BYTE const* const iend = ip + srcSize; | |||
BYTE* const ostart = (BYTE*)dst; | |||
BYTE* const oend = ostart + dstCapacity; | |||
BYTE* op = ostart; | |||
const BYTE* llCodePtr = seqStorePtr->llCode; | |||
const BYTE* mlCodePtr = seqStorePtr->mlCode; | |||
const BYTE* ofCodePtr = seqStorePtr->ofCode; | |||
size_t targetCBlockSize = cctxParams->targetCBlockSize; | |||
size_t litSize, seqCount; | |||
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; | |||
int writeSeqEntropy = 1; | |||
int lastSequence = 0; | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", | |||
(unsigned)(lend-lp), (unsigned)(send-sstart)); | |||
litSize = 0; | |||
seqCount = 0; | |||
do { | |||
size_t cBlockSizeEstimate = 0; | |||
if (sstart == send) { | |||
lastSequence = 1; | |||
} else { | |||
const seqDef* const sequence = sp + seqCount; | |||
lastSequence = sequence == send - 1; | |||
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; | |||
seqCount++; | |||
} | |||
if (lastSequence) { | |||
assert(lp <= lend); | |||
assert(litSize <= (size_t)(lend - lp)); | |||
litSize = (size_t)(lend - lp); | |||
} | |||
/* I think there is an optimization opportunity here. | |||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful | |||
* since it recalculates estimate from scratch. | |||
* For example, it would recount literal distribution and symbol codes everytime. | |||
*/ | |||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, | |||
&nextCBlock->entropy, entropyMetadata, | |||
workspace, wkspSize, writeLitEntropy, writeSeqEntropy); | |||
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { | |||
int litEntropyWritten = 0; | |||
int seqEntropyWritten = 0; | |||
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); | |||
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, | |||
sp, seqCount, | |||
lp, litSize, | |||
llCodePtr, mlCodePtr, ofCodePtr, | |||
cctxParams, | |||
op, oend-op, | |||
bmi2, writeLitEntropy, writeSeqEntropy, | |||
&litEntropyWritten, &seqEntropyWritten, | |||
lastBlock && lastSequence); | |||
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); | |||
if (cSize > 0 && cSize < decompressedSize) { | |||
DEBUGLOG(5, "Committed the sub-block"); | |||
assert(ip + decompressedSize <= iend); | |||
ip += decompressedSize; | |||
sp += seqCount; | |||
lp += litSize; | |||
op += cSize; | |||
llCodePtr += seqCount; | |||
mlCodePtr += seqCount; | |||
ofCodePtr += seqCount; | |||
litSize = 0; | |||
seqCount = 0; | |||
/* Entropy only needs to be written once */ | |||
if (litEntropyWritten) { | |||
writeLitEntropy = 0; | |||
} | |||
if (seqEntropyWritten) { | |||
writeSeqEntropy = 0; | |||
} | |||
} | |||
} | |||
} while (!lastSequence); | |||
if (writeLitEntropy) { | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); | |||
memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); | |||
} | |||
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { | |||
/* If we haven't written our entropy tables, then we've violated our contract and | |||
* must emit an uncompressed block. | |||
*/ | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); | |||
return 0; | |||
} | |||
if (ip < iend) { | |||
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); | |||
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); | |||
assert(cSize != 0); | |||
op += cSize; | |||
/* We have to regenerate the repcodes because we've skipped some sequences */ | |||
if (sp < send) { | |||
seqDef const* seq; | |||
repcodes_t rep; | |||
memcpy(&rep, prevCBlock->rep, sizeof(rep)); | |||
for (seq = sstart; seq < sp; ++seq) { | |||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); | |||
} | |||
memcpy(nextCBlock->rep, &rep, sizeof(rep)); | |||
} | |||
} | |||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); | |||
return op-ostart; | |||
} | |||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, | |||
void* dst, size_t dstCapacity, | |||
void const* src, size_t srcSize, | |||
unsigned lastBlock) { | |||
ZSTD_entropyCTablesMetadata_t entropyMetadata; | |||
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, | |||
&zc->blockState.prevCBlock->entropy, | |||
&zc->blockState.nextCBlock->entropy, | |||
&zc->appliedParams, | |||
&entropyMetadata, | |||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); | |||
return ZSTD_compressSubBlock_multi(&zc->seqStore, | |||
zc->blockState.prevCBlock, | |||
zc->blockState.nextCBlock, | |||
&entropyMetadata, | |||
&zc->appliedParams, | |||
dst, dstCapacity, | |||
src, srcSize, | |||
zc->bmi2, lastBlock, | |||
zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); | |||
} |
@@ -0,0 +1,32 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_COMPRESS_ADVANCED_H | |||
#define ZSTD_COMPRESS_ADVANCED_H | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include "../zstd.h" /* ZSTD_CCtx */ | |||
/*-************************************* | |||
* Target Compressed Block Size | |||
***************************************/ | |||
/* ZSTD_compressSuperBlock() : | |||
* Used to compress a super block when targetCBlockSize is being used. | |||
* The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ | |||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, | |||
void* dst, size_t dstCapacity, | |||
void const* src, size_t srcSize, | |||
unsigned lastBlock); | |||
#endif /* ZSTD_COMPRESS_ADVANCED_H */ |
@@ -0,0 +1,525 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_CWKSP_H | |||
#define ZSTD_CWKSP_H | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#include "zstd_internal.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/*-************************************* | |||
* Constants | |||
***************************************/ | |||
/* Since the workspace is effectively its own little malloc implementation / | |||
* arena, when we run under ASAN, we should similarly insert redzones between | |||
* each internal element of the workspace, so ASAN will catch overruns that | |||
* reach outside an object but that stay inside the workspace. | |||
* | |||
* This defines the size of that redzone. | |||
*/ | |||
#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE | |||
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 | |||
#endif | |||
/*-************************************* | |||
* Structures | |||
***************************************/ | |||
typedef enum { | |||
ZSTD_cwksp_alloc_objects, | |||
ZSTD_cwksp_alloc_buffers, | |||
ZSTD_cwksp_alloc_aligned | |||
} ZSTD_cwksp_alloc_phase_e; | |||
/** | |||
* Zstd fits all its internal datastructures into a single continuous buffer, | |||
* so that it only needs to perform a single OS allocation (or so that a buffer | |||
* can be provided to it and it can perform no allocations at all). This buffer | |||
* is called the workspace. | |||
* | |||
* Several optimizations complicate that process of allocating memory ranges | |||
* from this workspace for each internal datastructure: | |||
* | |||
* - These different internal datastructures have different setup requirements: | |||
* | |||
* - The static objects need to be cleared once and can then be trivially | |||
* reused for each compression. | |||
* | |||
* - Various buffers don't need to be initialized at all--they are always | |||
* written into before they're read. | |||
* | |||
* - The matchstate tables have a unique requirement that they don't need | |||
* their memory to be totally cleared, but they do need the memory to have | |||
* some bound, i.e., a guarantee that all values in the memory they've been | |||
* allocated is less than some maximum value (which is the starting value | |||
* for the indices that they will then use for compression). When this | |||
* guarantee is provided to them, they can use the memory without any setup | |||
* work. When it can't, they have to clear the area. | |||
* | |||
* - These buffers also have different alignment requirements. | |||
* | |||
* - We would like to reuse the objects in the workspace for multiple | |||
* compressions without having to perform any expensive reallocation or | |||
* reinitialization work. | |||
* | |||
* - We would like to be able to efficiently reuse the workspace across | |||
* multiple compressions **even when the compression parameters change** and | |||
* we need to resize some of the objects (where possible). | |||
* | |||
* To attempt to manage this buffer, given these constraints, the ZSTD_cwksp | |||
* abstraction was created. It works as follows: | |||
* | |||
* Workspace Layout: | |||
* | |||
* [ ... workspace ... ] | |||
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] | |||
* | |||
* The various objects that live in the workspace are divided into the | |||
* following categories, and are allocated separately: | |||
* | |||
* - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, | |||
* so that literally everything fits in a single buffer. Note: if present, | |||
* this must be the first object in the workspace, since ZSTD_free{CCtx, | |||
* CDict}() rely on a pointer comparison to see whether one or two frees are | |||
* required. | |||
* | |||
* - Fixed size objects: these are fixed-size, fixed-count objects that are | |||
* nonetheless "dynamically" allocated in the workspace so that we can | |||
* control how they're initialized separately from the broader ZSTD_CCtx. | |||
* Examples: | |||
* - Entropy Workspace | |||
* - 2 x ZSTD_compressedBlockState_t | |||
* - CDict dictionary contents | |||
* | |||
* - Tables: these are any of several different datastructures (hash tables, | |||
* chain tables, binary trees) that all respect a common format: they are | |||
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base). | |||
* Their sizes depend on the cparams. | |||
* | |||
* - Aligned: these buffers are used for various purposes that require 4 byte | |||
* alignment, but don't require any initialization before they're used. | |||
* | |||
* - Buffers: these buffers are used for various purposes that don't require | |||
* any alignment or initialization before they're used. This means they can | |||
* be moved around at no cost for a new compression. | |||
* | |||
* Allocating Memory: | |||
* | |||
* The various types of objects must be allocated in order, so they can be | |||
* correctly packed into the workspace buffer. That order is: | |||
* | |||
* 1. Objects | |||
* 2. Buffers | |||
* 3. Aligned | |||
* 4. Tables | |||
* | |||
* Attempts to reserve objects of different types out of order will fail. | |||
*/ | |||
typedef struct { | |||
void* workspace; | |||
void* workspaceEnd; | |||
void* objectEnd; | |||
void* tableEnd; | |||
void* tableValidEnd; | |||
void* allocStart; | |||
int allocFailed; | |||
int workspaceOversizedDuration; | |||
ZSTD_cwksp_alloc_phase_e phase; | |||
} ZSTD_cwksp; | |||
/*-************************************* | |||
* Functions | |||
***************************************/ | |||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); | |||
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { | |||
(void)ws; | |||
assert(ws->workspace <= ws->objectEnd); | |||
assert(ws->objectEnd <= ws->tableEnd); | |||
assert(ws->objectEnd <= ws->tableValidEnd); | |||
assert(ws->tableEnd <= ws->allocStart); | |||
assert(ws->tableValidEnd <= ws->allocStart); | |||
assert(ws->allocStart <= ws->workspaceEnd); | |||
} | |||
/** | |||
* Align must be a power of 2. | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { | |||
size_t const mask = align - 1; | |||
assert((align & mask) == 0); | |||
return (size + mask) & ~mask; | |||
} | |||
/** | |||
* Use this to determine how much space in the workspace we will consume to | |||
* allocate this object. (Normally it should be exactly the size of the object, | |||
* but under special conditions, like ASAN, where we pad each object, it might | |||
* be larger.) | |||
* | |||
* Since tables aren't currently redzoned, you don't need to call through this | |||
* to figure out how much space you need for the matchState tables. Everything | |||
* else is though. | |||
*/ | |||
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#else | |||
return size; | |||
#endif | |||
} | |||
MEM_STATIC void ZSTD_cwksp_internal_advance_phase( | |||
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { | |||
assert(phase >= ws->phase); | |||
if (phase > ws->phase) { | |||
if (ws->phase < ZSTD_cwksp_alloc_buffers && | |||
phase >= ZSTD_cwksp_alloc_buffers) { | |||
ws->tableValidEnd = ws->objectEnd; | |||
} | |||
if (ws->phase < ZSTD_cwksp_alloc_aligned && | |||
phase >= ZSTD_cwksp_alloc_aligned) { | |||
/* If unaligned allocations down from a too-large top have left us | |||
* unaligned, we need to realign our alloc ptr. Technically, this | |||
* can consume space that is unaccounted for in the neededSpace | |||
* calculation. However, I believe this can only happen when the | |||
* workspace is too large, and specifically when it is too large | |||
* by a larger margin than the space that will be consumed. */ | |||
/* TODO: cleaner, compiler warning friendly way to do this??? */ | |||
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); | |||
if (ws->allocStart < ws->tableValidEnd) { | |||
ws->tableValidEnd = ws->allocStart; | |||
} | |||
} | |||
ws->phase = phase; | |||
} | |||
} | |||
/** | |||
* Returns whether this object/buffer/etc was allocated in this workspace. | |||
*/ | |||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { | |||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); | |||
} | |||
/** | |||
* Internal function. Do not use directly. | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_internal( | |||
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { | |||
void* alloc; | |||
void* bottom = ws->tableEnd; | |||
ZSTD_cwksp_internal_advance_phase(ws, phase); | |||
alloc = (BYTE *)ws->allocStart - bytes; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* over-reserve space */ | |||
alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#endif | |||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", | |||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
assert(alloc >= bottom); | |||
if (alloc < bottom) { | |||
DEBUGLOG(4, "cwksp: alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
if (alloc < ws->tableValidEnd) { | |||
ws->tableValidEnd = alloc; | |||
} | |||
ws->allocStart = alloc; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |||
* either size. */ | |||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
#endif | |||
return alloc; | |||
} | |||
/** | |||
* Reserves and returns unaligned memory. | |||
*/ | |||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { | |||
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); | |||
} | |||
/** | |||
* Reserves and returns memory sized on and aligned on sizeof(unsigned). | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { | |||
assert((bytes & (sizeof(U32)-1)) == 0); | |||
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); | |||
} | |||
/** | |||
* Aligned on sizeof(unsigned). These buffers have the special property that | |||
* their values remain constrained, allowing us to re-use them without | |||
* memset()-ing them. | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { | |||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; | |||
void* alloc = ws->tableEnd; | |||
void* end = (BYTE *)alloc + bytes; | |||
void* top = ws->allocStart; | |||
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", | |||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); | |||
assert((bytes & (sizeof(U32)-1)) == 0); | |||
ZSTD_cwksp_internal_advance_phase(ws, phase); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
assert(end <= top); | |||
if (end > top) { | |||
DEBUGLOG(4, "cwksp: table alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
ws->tableEnd = end; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
#endif | |||
return alloc; | |||
} | |||
/** | |||
* Aligned on sizeof(void*). | |||
*/ | |||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { | |||
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); | |||
void* alloc = ws->objectEnd; | |||
void* end = (BYTE*)alloc + roundedBytes; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* over-reserve space */ | |||
end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
#endif | |||
DEBUGLOG(5, | |||
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", | |||
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); | |||
assert(((size_t)alloc & (sizeof(void*)-1)) == 0); | |||
assert((bytes & (sizeof(void*)-1)) == 0); | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
/* we must be in the first phase, no advance is possible */ | |||
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { | |||
DEBUGLOG(4, "cwksp: object alloc failed!"); | |||
ws->allocFailed = 1; | |||
return NULL; | |||
} | |||
ws->objectEnd = end; | |||
ws->tableEnd = end; | |||
ws->tableValidEnd = end; | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
/* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on | |||
* either size. */ | |||
alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; | |||
__asan_unpoison_memory_region(alloc, bytes); | |||
#endif | |||
return alloc; | |||
} | |||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); | |||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
/* To validate that the table re-use logic is sound, and that we don't | |||
* access table space that we haven't cleaned, we re-"poison" the table | |||
* space every time we mark it dirty. */ | |||
{ | |||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; | |||
assert(__msan_test_shadow(ws->objectEnd, size) == -1); | |||
__msan_poison(ws->objectEnd, size); | |||
} | |||
#endif | |||
assert(ws->tableValidEnd >= ws->objectEnd); | |||
assert(ws->tableValidEnd <= ws->allocStart); | |||
ws->tableValidEnd = ws->objectEnd; | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); | |||
assert(ws->tableValidEnd >= ws->objectEnd); | |||
assert(ws->tableValidEnd <= ws->allocStart); | |||
if (ws->tableValidEnd < ws->tableEnd) { | |||
ws->tableValidEnd = ws->tableEnd; | |||
} | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
/** | |||
* Zero the part of the allocated tables not already marked clean. | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); | |||
assert(ws->tableValidEnd >= ws->objectEnd); | |||
assert(ws->tableValidEnd <= ws->allocStart); | |||
if (ws->tableValidEnd < ws->tableEnd) { | |||
memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); | |||
} | |||
ZSTD_cwksp_mark_tables_clean(ws); | |||
} | |||
/** | |||
* Invalidates table allocations. | |||
* All other allocations remain valid. | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: clearing tables!"); | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
{ | |||
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; | |||
__asan_poison_memory_region(ws->objectEnd, size); | |||
} | |||
#endif | |||
ws->tableEnd = ws->objectEnd; | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
/** | |||
* Invalidates all buffer, aligned, and table allocations. | |||
* Object allocations remain valid. | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { | |||
DEBUGLOG(4, "cwksp: clearing!"); | |||
#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) | |||
/* To validate that the context re-use logic is sound, and that we don't | |||
* access stuff that this compression hasn't initialized, we re-"poison" | |||
* the workspace (or at least the non-static, non-table parts of it) | |||
* every time we start a new compression. */ | |||
{ | |||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; | |||
__msan_poison(ws->tableValidEnd, size); | |||
} | |||
#endif | |||
#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) | |||
{ | |||
size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; | |||
__asan_poison_memory_region(ws->objectEnd, size); | |||
} | |||
#endif | |||
ws->tableEnd = ws->objectEnd; | |||
ws->allocStart = ws->workspaceEnd; | |||
ws->allocFailed = 0; | |||
if (ws->phase > ZSTD_cwksp_alloc_buffers) { | |||
ws->phase = ZSTD_cwksp_alloc_buffers; | |||
} | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
/** | |||
* The provided workspace takes ownership of the buffer [start, start+size). | |||
* Any existing values in the workspace are ignored (the previously managed | |||
* buffer, if present, must be separately freed). | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { | |||
DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); | |||
assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ | |||
ws->workspace = start; | |||
ws->workspaceEnd = (BYTE*)start + size; | |||
ws->objectEnd = ws->workspace; | |||
ws->tableValidEnd = ws->objectEnd; | |||
ws->phase = ZSTD_cwksp_alloc_objects; | |||
ZSTD_cwksp_clear(ws); | |||
ws->workspaceOversizedDuration = 0; | |||
ZSTD_cwksp_assert_internal_consistency(ws); | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { | |||
void* workspace = ZSTD_malloc(size, customMem); | |||
DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); | |||
RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); | |||
ZSTD_cwksp_init(ws, workspace, size); | |||
return 0; | |||
} | |||
MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { | |||
void *ptr = ws->workspace; | |||
DEBUGLOG(4, "cwksp: freeing workspace"); | |||
memset(ws, 0, sizeof(ZSTD_cwksp)); | |||
ZSTD_free(ptr, customMem); | |||
} | |||
/** | |||
* Moves the management of a workspace from one cwksp to another. The src cwksp | |||
* is left in an invalid state (src must be re-init()'ed before its used again). | |||
*/ | |||
MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { | |||
*dst = *src; | |||
memset(src, 0, sizeof(ZSTD_cwksp)); | |||
} | |||
MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { | |||
return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); | |||
} | |||
MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { | |||
return ws->allocFailed; | |||
} | |||
/*-************************************* | |||
* Functions Checking Free Space | |||
***************************************/ | |||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { | |||
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); | |||
} | |||
MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |||
return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; | |||
} | |||
MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |||
return ZSTD_cwksp_check_available( | |||
ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); | |||
} | |||
MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |||
return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) | |||
&& ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; | |||
} | |||
MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( | |||
ZSTD_cwksp* ws, size_t additionalNeededSpace) { | |||
if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { | |||
ws->workspaceOversizedDuration++; | |||
} else { | |||
ws->workspaceOversizedDuration = 0; | |||
} | |||
} | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* ZSTD_CWKSP_H */ |
@@ -0,0 +1,244 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/* zstd_ddict.c : | |||
* concentrates all logic that needs to know the internals of ZSTD_DDict object */ | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <string.h> /* memcpy, memmove, memset */ | |||
#include "cpu.h" /* bmi2 */ | |||
#include "mem.h" /* low level memory routines */ | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#define HUF_STATIC_LINKING_ONLY | |||
#include "huf.h" | |||
#include "zstd_decompress_internal.h" | |||
#include "zstd_ddict.h" | |||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) | |||
# include "../legacy/zstd_legacy.h" | |||
#endif | |||
/*-******************************************************* | |||
* Types | |||
*********************************************************/ | |||
struct ZSTD_DDict_s { | |||
void* dictBuffer; | |||
const void* dictContent; | |||
size_t dictSize; | |||
ZSTD_entropyDTables_t entropy; | |||
U32 dictID; | |||
U32 entropyPresent; | |||
ZSTD_customMem cMem; | |||
}; /* typedef'd to ZSTD_DDict within "zstd.h" */ | |||
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) | |||
{ | |||
assert(ddict != NULL); | |||
return ddict->dictContent; | |||
} | |||
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) | |||
{ | |||
assert(ddict != NULL); | |||
return ddict->dictSize; | |||
} | |||
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) | |||
{ | |||
DEBUGLOG(4, "ZSTD_copyDDictParameters"); | |||
assert(dctx != NULL); | |||
assert(ddict != NULL); | |||
dctx->dictID = ddict->dictID; | |||
dctx->prefixStart = ddict->dictContent; | |||
dctx->virtualStart = ddict->dictContent; | |||
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; | |||
dctx->previousDstEnd = dctx->dictEnd; | |||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |||
dctx->dictContentBeginForFuzzing = dctx->prefixStart; | |||
dctx->dictContentEndForFuzzing = dctx->previousDstEnd; | |||
#endif | |||
if (ddict->entropyPresent) { | |||
dctx->litEntropy = 1; | |||
dctx->fseEntropy = 1; | |||
dctx->LLTptr = ddict->entropy.LLTable; | |||
dctx->MLTptr = ddict->entropy.MLTable; | |||
dctx->OFTptr = ddict->entropy.OFTable; | |||
dctx->HUFptr = ddict->entropy.hufTable; | |||
dctx->entropy.rep[0] = ddict->entropy.rep[0]; | |||
dctx->entropy.rep[1] = ddict->entropy.rep[1]; | |||
dctx->entropy.rep[2] = ddict->entropy.rep[2]; | |||
} else { | |||
dctx->litEntropy = 0; | |||
dctx->fseEntropy = 0; | |||
} | |||
} | |||
static size_t | |||
ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, | |||
ZSTD_dictContentType_e dictContentType) | |||
{ | |||
ddict->dictID = 0; | |||
ddict->entropyPresent = 0; | |||
if (dictContentType == ZSTD_dct_rawContent) return 0; | |||
if (ddict->dictSize < 8) { | |||
if (dictContentType == ZSTD_dct_fullDict) | |||
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ | |||
return 0; /* pure content mode */ | |||
} | |||
{ U32 const magic = MEM_readLE32(ddict->dictContent); | |||
if (magic != ZSTD_MAGIC_DICTIONARY) { | |||
if (dictContentType == ZSTD_dct_fullDict) | |||
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ | |||
return 0; /* pure content mode */ | |||
} | |||
} | |||
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); | |||
/* load entropy tables */ | |||
RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( | |||
&ddict->entropy, ddict->dictContent, ddict->dictSize)), | |||
dictionary_corrupted, ""); | |||
ddict->entropyPresent = 1; | |||
return 0; | |||
} | |||
static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, | |||
const void* dict, size_t dictSize, | |||
ZSTD_dictLoadMethod_e dictLoadMethod, | |||
ZSTD_dictContentType_e dictContentType) | |||
{ | |||
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { | |||
ddict->dictBuffer = NULL; | |||
ddict->dictContent = dict; | |||
if (!dict) dictSize = 0; | |||
} else { | |||
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); | |||
ddict->dictBuffer = internalBuffer; | |||
ddict->dictContent = internalBuffer; | |||
if (!internalBuffer) return ERROR(memory_allocation); | |||
memcpy(internalBuffer, dict, dictSize); | |||
} | |||
ddict->dictSize = dictSize; | |||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ | |||
/* parse dictionary content */ | |||
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); | |||
return 0; | |||
} | |||
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, | |||
ZSTD_dictLoadMethod_e dictLoadMethod, | |||
ZSTD_dictContentType_e dictContentType, | |||
ZSTD_customMem customMem) | |||
{ | |||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL; | |||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); | |||
if (ddict == NULL) return NULL; | |||
ddict->cMem = customMem; | |||
{ size_t const initResult = ZSTD_initDDict_internal(ddict, | |||
dict, dictSize, | |||
dictLoadMethod, dictContentType); | |||
if (ZSTD_isError(initResult)) { | |||
ZSTD_freeDDict(ddict); | |||
return NULL; | |||
} } | |||
return ddict; | |||
} | |||
} | |||
/*! ZSTD_createDDict() : | |||
* Create a digested dictionary, to start decompression without startup delay. | |||
* `dict` content is copied inside DDict. | |||
* Consequently, `dict` can be released after `ZSTD_DDict` creation */ | |||
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) | |||
{ | |||
ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |||
return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); | |||
} | |||
/*! ZSTD_createDDict_byReference() : | |||
* Create a digested dictionary, to start decompression without startup delay. | |||
* Dictionary content is simply referenced, it will be accessed during decompression. | |||
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ | |||
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) | |||
{ | |||
ZSTD_customMem const allocator = { NULL, NULL, NULL }; | |||
return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); | |||
} | |||
const ZSTD_DDict* ZSTD_initStaticDDict( | |||
void* sBuffer, size_t sBufferSize, | |||
const void* dict, size_t dictSize, | |||
ZSTD_dictLoadMethod_e dictLoadMethod, | |||
ZSTD_dictContentType_e dictContentType) | |||
{ | |||
size_t const neededSpace = sizeof(ZSTD_DDict) | |||
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); | |||
ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; | |||
assert(sBuffer != NULL); | |||
assert(dict != NULL); | |||
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ | |||
if (sBufferSize < neededSpace) return NULL; | |||
if (dictLoadMethod == ZSTD_dlm_byCopy) { | |||
memcpy(ddict+1, dict, dictSize); /* local copy */ | |||
dict = ddict+1; | |||
} | |||
if (ZSTD_isError( ZSTD_initDDict_internal(ddict, | |||
dict, dictSize, | |||
ZSTD_dlm_byRef, dictContentType) )) | |||
return NULL; | |||
return ddict; | |||
} | |||
size_t ZSTD_freeDDict(ZSTD_DDict* ddict) | |||
{ | |||
if (ddict==NULL) return 0; /* support free on NULL */ | |||
{ ZSTD_customMem const cMem = ddict->cMem; | |||
ZSTD_free(ddict->dictBuffer, cMem); | |||
ZSTD_free(ddict, cMem); | |||
return 0; | |||
} | |||
} | |||
/*! ZSTD_estimateDDictSize() : | |||
* Estimate amount of memory that will be needed to create a dictionary for decompression. | |||
* Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ | |||
size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) | |||
{ | |||
return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); | |||
} | |||
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) | |||
{ | |||
if (ddict==NULL) return 0; /* support sizeof on NULL */ | |||
return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; | |||
} | |||
/*! ZSTD_getDictID_fromDDict() : | |||
* Provides the dictID of the dictionary loaded into `ddict`. | |||
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |||
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |||
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) | |||
{ | |||
if (ddict==NULL) return 0; | |||
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); | |||
} |
@@ -0,0 +1,44 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_DDICT_H | |||
#define ZSTD_DDICT_H | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <stddef.h> /* size_t */ | |||
#include "../zstd.h" /* ZSTD_DDict, and several public functions */ | |||
/*-******************************************************* | |||
* Interface | |||
*********************************************************/ | |||
/* note: several prototypes are already published in `zstd.h` : | |||
* ZSTD_createDDict() | |||
* ZSTD_createDDict_byReference() | |||
* ZSTD_createDDict_advanced() | |||
* ZSTD_freeDDict() | |||
* ZSTD_initStaticDDict() | |||
* ZSTD_sizeof_DDict() | |||
* ZSTD_estimateDDictSize() | |||
* ZSTD_getDictID_fromDict() | |||
*/ | |||
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); | |||
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); | |||
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); | |||
#endif /* ZSTD_DDICT_H */ |
@@ -0,0 +1,59 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_DEC_BLOCK_H | |||
#define ZSTD_DEC_BLOCK_H | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include <stddef.h> /* size_t */ | |||
#include "zstd.h" /* DCtx, and some public functions */ | |||
#include "zstd_internal.h" /* blockProperties_t, and some public functions */ | |||
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ | |||
/* === Prototypes === */ | |||
/* note: prototypes already published within `zstd.h` : | |||
* ZSTD_decompressBlock() | |||
*/ | |||
/* note: prototypes already published within `zstd_internal.h` : | |||
* ZSTD_getcBlockSize() | |||
* ZSTD_decodeSeqHeaders() | |||
*/ | |||
/* ZSTD_decompressBlock_internal() : | |||
* decompress block, starting at `src`, | |||
* into destination buffer `dst`. | |||
* @return : decompressed block size, | |||
* or an error code (which can be tested using ZSTD_isError()) | |||
*/ | |||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, const int frame); | |||
/* ZSTD_buildFSETable() : | |||
* generate FSE decoding table for one symbol (ll, ml or off) | |||
* this function must be called with valid parameters only | |||
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) | |||
* in which case it cannot fail. | |||
* Internal use only. | |||
*/ | |||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, | |||
const short* normalizedCounter, unsigned maxSymbolValue, | |||
const U32* baseValue, const U32* nbAdditionalBits, | |||
unsigned tableLog); | |||
#endif /* ZSTD_DEC_BLOCK_H */ |
@@ -0,0 +1,189 @@ | |||
/* | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
/* zstd_decompress_internal: | |||
* objects and definitions shared within lib/decompress modules */ | |||
#ifndef ZSTD_DECOMPRESS_INTERNAL_H | |||
#define ZSTD_DECOMPRESS_INTERNAL_H | |||
/*-******************************************************* | |||
* Dependencies | |||
*********************************************************/ | |||
#include "mem.h" /* BYTE, U16, U32 */ | |||
#include "zstd_internal.h" /* ZSTD_seqSymbol */ | |||
/*-******************************************************* | |||
* Constants | |||
*********************************************************/ | |||
static const U32 LL_base[MaxLL+1] = { | |||
0, 1, 2, 3, 4, 5, 6, 7, | |||
8, 9, 10, 11, 12, 13, 14, 15, | |||
16, 18, 20, 22, 24, 28, 32, 40, | |||
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | |||
0x2000, 0x4000, 0x8000, 0x10000 }; | |||
static const U32 OF_base[MaxOff+1] = { | |||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, | |||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, | |||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, | |||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; | |||
static const U32 OF_bits[MaxOff+1] = { | |||
0, 1, 2, 3, 4, 5, 6, 7, | |||
8, 9, 10, 11, 12, 13, 14, 15, | |||
16, 17, 18, 19, 20, 21, 22, 23, | |||
24, 25, 26, 27, 28, 29, 30, 31 }; | |||
static const U32 ML_base[MaxML+1] = { | |||
3, 4, 5, 6, 7, 8, 9, 10, | |||
11, 12, 13, 14, 15, 16, 17, 18, | |||
19, 20, 21, 22, 23, 24, 25, 26, | |||
27, 28, 29, 30, 31, 32, 33, 34, | |||
35, 37, 39, 41, 43, 47, 51, 59, | |||
67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, | |||
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; | |||
/*-******************************************************* | |||
* Decompression types | |||
*********************************************************/ | |||
typedef struct { | |||
U32 fastMode; | |||
U32 tableLog; | |||
} ZSTD_seqSymbol_header; | |||
typedef struct { | |||
U16 nextState; | |||
BYTE nbAdditionalBits; | |||
BYTE nbBits; | |||
U32 baseValue; | |||
} ZSTD_seqSymbol; | |||
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) | |||
typedef struct { | |||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ | |||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ | |||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ | |||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ | |||
U32 rep[ZSTD_REP_NUM]; | |||
} ZSTD_entropyDTables_t; | |||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, | |||
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, | |||
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, | |||
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; | |||
typedef enum { zdss_init=0, zdss_loadHeader, | |||
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; | |||
typedef enum { | |||
ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ | |||
ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ | |||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ | |||
} ZSTD_dictUses_e; | |||
typedef enum { | |||
ZSTD_obm_buffered = 0, /* Buffer the output */ | |||
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ | |||
} ZSTD_outBufferMode_e; | |||
struct ZSTD_DCtx_s | |||
{ | |||
const ZSTD_seqSymbol* LLTptr; | |||
const ZSTD_seqSymbol* MLTptr; | |||
const ZSTD_seqSymbol* OFTptr; | |||
const HUF_DTable* HUFptr; | |||
ZSTD_entropyDTables_t entropy; | |||
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ | |||
const void* previousDstEnd; /* detect continuity */ | |||
const void* prefixStart; /* start of current segment */ | |||
const void* virtualStart; /* virtual start of previous segment if it was just before current one */ | |||
const void* dictEnd; /* end of previous segment */ | |||
size_t expected; | |||
ZSTD_frameHeader fParams; | |||
U64 decodedSize; | |||
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ | |||
ZSTD_dStage stage; | |||
U32 litEntropy; | |||
U32 fseEntropy; | |||
XXH64_state_t xxhState; | |||
size_t headerSize; | |||
ZSTD_format_e format; | |||
const BYTE* litPtr; | |||
ZSTD_customMem customMem; | |||
size_t litSize; | |||
size_t rleSize; | |||
size_t staticSize; | |||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ | |||
/* dictionary */ | |||
ZSTD_DDict* ddictLocal; | |||
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ | |||
U32 dictID; | |||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ | |||
ZSTD_dictUses_e dictUses; | |||
/* streaming */ | |||
ZSTD_dStreamStage streamStage; | |||
char* inBuff; | |||
size_t inBuffSize; | |||
size_t inPos; | |||
size_t maxWindowSize; | |||
char* outBuff; | |||
size_t outBuffSize; | |||
size_t outStart; | |||
size_t outEnd; | |||
size_t lhSize; | |||
void* legacyContext; | |||
U32 previousLegacyVersion; | |||
U32 legacyVersion; | |||
U32 hostageByte; | |||
int noForwardProgress; | |||
ZSTD_outBufferMode_e outBufferMode; | |||
ZSTD_outBuffer expectedOutBuffer; | |||
/* workspace */ | |||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; | |||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; | |||
size_t oversizedDuration; | |||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |||
void const* dictContentBeginForFuzzing; | |||
void const* dictContentEndForFuzzing; | |||
#endif | |||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ | |||
/*-******************************************************* | |||
* Shared internal functions | |||
*********************************************************/ | |||
/*! ZSTD_loadDEntropy() : | |||
* dict : must point at beginning of a valid zstd dictionary. | |||
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ | |||
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, | |||
const void* const dict, size_t const dictSize); | |||
/*! ZSTD_checkContinuity() : | |||
* check if next `dst` follows previous position, where decompression ended. | |||
* If yes, do nothing (continue on current segment). | |||
* If not, classify previous segment as "external dictionary", and start a new segment. | |||
* This function cannot fail. */ | |||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); | |||
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -8,270 +8,481 @@ | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#include "zstd_compress_internal.h" | |||
#include "zstd_double_fast.h" | |||
void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls) | |||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm) | |||
{ | |||
U32* const hashLarge = cctx->hashTable; | |||
U32 const hBitsL = cctx->appliedParams.cParams.hashLog; | |||
U32* const hashSmall = cctx->chainTable; | |||
U32 const hBitsS = cctx->appliedParams.cParams.chainLog; | |||
const BYTE* const base = cctx->base; | |||
const BYTE* ip = base + cctx->nextToUpdate; | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashLarge = ms->hashTable; | |||
U32 const hBitsL = cParams->hashLog; | |||
U32 const mls = cParams->minMatch; | |||
U32* const hashSmall = ms->chainTable; | |||
U32 const hBitsS = cParams->chainLog; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* ip = base + ms->nextToUpdate; | |||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; | |||
const size_t fastHashFillStep = 3; | |||
while(ip <= iend) { | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); | |||
hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); | |||
ip += fastHashFillStep; | |||
} | |||
const U32 fastHashFillStep = 3; | |||
/* Always insert every fastHashFillStep position into the hash tables. | |||
* Insert the other positions into the large hash table if their entry | |||
* is empty. | |||
*/ | |||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { | |||
U32 const current = (U32)(ip - base); | |||
U32 i; | |||
for (i = 0; i < fastHashFillStep; ++i) { | |||
size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); | |||
size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); | |||
if (i == 0) | |||
hashSmall[smHash] = current + i; | |||
if (i == 0 || hashLarge[lgHash] == 0) | |||
hashLarge[lgHash] = current + i; | |||
/* Only load extra positions for ZSTD_dtlm_full */ | |||
if (dtlm == ZSTD_dtlm_fast) | |||
break; | |||
} } | |||
} | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, | |||
const void* src, size_t srcSize, | |||
const U32 mls) | |||
size_t ZSTD_compressBlock_doubleFast_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, | |||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode) | |||
{ | |||
U32* const hashLong = cctx->hashTable; | |||
const U32 hBitsL = cctx->appliedParams.cParams.hashLog; | |||
U32* const hashSmall = cctx->chainTable; | |||
const U32 hBitsS = cctx->appliedParams.cParams.chainLog; | |||
seqStore_t* seqStorePtr = &(cctx->seqStore); | |||
const BYTE* const base = cctx->base; | |||
ZSTD_compressionParameters const* cParams = &ms->cParams; | |||
U32* const hashLong = ms->hashTable; | |||
const U32 hBitsL = cParams->hashLog; | |||
U32* const hashSmall = ms->chainTable; | |||
const U32 hBitsS = cParams->chainLog; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = cctx->dictLimit; | |||
const BYTE* const lowest = base + lowestIndex; | |||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
/* presumes that, if there is a dictionary, it must be using Attach mode */ | |||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); | |||
const BYTE* const prefixLowest = base + prefixLowestIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
U32 offsetSaved = 0; | |||
const ZSTD_matchState_t* const dms = ms->dictMatchState; | |||
const ZSTD_compressionParameters* const dictCParams = | |||
dictMode == ZSTD_dictMatchState ? | |||
&dms->cParams : NULL; | |||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? | |||
dms->hashTable : NULL; | |||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? | |||
dms->chainTable : NULL; | |||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? | |||
dms->window.dictLimit : 0; | |||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? | |||
dms->window.base : NULL; | |||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? | |||
dictBase + dictStartIndex : NULL; | |||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? | |||
dms->window.nextSrc : NULL; | |||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? | |||
prefixLowestIndex - (U32)(dictEnd - dictBase) : | |||
0; | |||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? | |||
dictCParams->hashLog : hBitsL; | |||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? | |||
dictCParams->chainLog : hBitsS; | |||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); | |||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); | |||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); | |||
/* if a dictionary is attached, it must be within window range */ | |||
if (dictMode == ZSTD_dictMatchState) { | |||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); | |||
} | |||
/* init */ | |||
ip += (ip==lowest); | |||
{ U32 const maxRep = (U32)(ip-lowest); | |||
ip += (dictAndPrefixLength == 0); | |||
if (dictMode == ZSTD_noDict) { | |||
U32 const current = (U32)(ip - base); | |||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); | |||
U32 const maxRep = current - windowLow; | |||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |||
} | |||
if (dictMode == ZSTD_dictMatchState) { | |||
/* dictMatchState repCode checks don't currently handle repCode == 0 | |||
* disabling. */ | |||
assert(offset_1 <= dictAndPrefixLength); | |||
assert(offset_2 <= dictAndPrefixLength); | |||
} | |||
/* Main Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |||
size_t mLength; | |||
U32 offset; | |||
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); | |||
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); | |||
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); | |||
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); | |||
U32 const current = (U32)(ip-base); | |||
U32 const matchIndexL = hashLong[h2]; | |||
U32 const matchIndexS = hashSmall[h]; | |||
U32 matchIndexS = hashSmall[h]; | |||
const BYTE* matchLong = base + matchIndexL; | |||
const BYTE* match = base + matchIndexS; | |||
const U32 repIndex = current + 1 - offset_1; | |||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState | |||
&& repIndex < prefixLowestIndex) ? | |||
dictBase + (repIndex - dictIndexDelta) : | |||
base + repIndex; | |||
hashLong[h2] = hashSmall[h] = current; /* update hash tables */ | |||
assert(offset_1 <= current); /* supposed guaranteed by construction */ | |||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { | |||
/* favor repcode */ | |||
/* check dictMatchState repcode */ | |||
if (dictMode == ZSTD_dictMatchState | |||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
goto _match_stored; | |||
} | |||
/* check noDict repcode */ | |||
if ( dictMode == ZSTD_noDict | |||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { | |||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); | |||
} else { | |||
U32 offset; | |||
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
goto _match_stored; | |||
} | |||
if (matchIndexL > prefixLowestIndex) { | |||
/* check prefix long match */ | |||
if (MEM_read64(matchLong) == MEM_read64(ip)) { | |||
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; | |||
offset = (U32)(ip-matchLong); | |||
while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |||
} else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { | |||
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |||
U32 const matchIndexL3 = hashLong[hl3]; | |||
const BYTE* matchL3 = base + matchIndexL3; | |||
hashLong[hl3] = current + 1; | |||
if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { | |||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
/* check dictMatchState long match */ | |||
U32 const dictMatchIndexL = dictHashLong[dictHL]; | |||
const BYTE* dictMatchL = dictBase + dictMatchIndexL; | |||
assert(dictMatchL < dictEnd); | |||
if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { | |||
mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; | |||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta); | |||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} } | |||
if (matchIndexS > prefixLowestIndex) { | |||
/* check prefix short match */ | |||
if (MEM_read32(match) == MEM_read32(ip)) { | |||
goto _search_next_long; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
/* check dictMatchState short match */ | |||
U32 const dictMatchIndexS = dictHashSmall[dictHS]; | |||
match = dictBase + dictMatchIndexS; | |||
matchIndexS = dictMatchIndexS + dictIndexDelta; | |||
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { | |||
goto _search_next_long; | |||
} } | |||
ip += ((ip-anchor) >> kSearchStrength) + 1; | |||
#if defined(__aarch64__) | |||
PREFETCH_L1(ip+256); | |||
#endif | |||
continue; | |||
_search_next_long: | |||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); | |||
U32 const matchIndexL3 = hashLong[hl3]; | |||
const BYTE* matchL3 = base + matchIndexL3; | |||
hashLong[hl3] = current + 1; | |||
/* check prefix long +1 match */ | |||
if (matchIndexL3 > prefixLowestIndex) { | |||
if (MEM_read64(matchL3) == MEM_read64(ip+1)) { | |||
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; | |||
ip++; | |||
offset = (U32)(ip-matchL3); | |||
while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ | |||
} else { | |||
mLength = ZSTD_count(ip+4, match+4, iend) + 4; | |||
offset = (U32)(ip-match); | |||
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} | |||
} else { | |||
ip += ((ip-anchor) >> g_searchStrength) + 1; | |||
continue; | |||
} | |||
} else if (dictMode == ZSTD_dictMatchState) { | |||
/* check dict long +1 match */ | |||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; | |||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; | |||
assert(dictMatchL3 < dictEnd); | |||
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { | |||
mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; | |||
ip++; | |||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); | |||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ | |||
goto _match_found; | |||
} } } | |||
/* if no long +1 match, explore the short match we found */ | |||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; | |||
offset = (U32)(current - matchIndexS); | |||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} else { | |||
mLength = ZSTD_count(ip+4, match+4, iend) + 4; | |||
offset = (U32)(ip - match); | |||
while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
/* fall-through */ | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
} | |||
_match_found: | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
_match_stored: | |||
/* match found */ | |||
ip += mLength; | |||
anchor = ip; | |||
if (ip <= ilimit) { | |||
/* Fill Table */ | |||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = | |||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = | |||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); | |||
/* Complementary insertion */ | |||
/* done after iLimit test, as candidates could be > iend-8 */ | |||
{ U32 const indexToInsert = current+2; | |||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |||
} | |||
/* check immediate repcode */ | |||
while ( (ip <= ilimit) | |||
&& ( (offset_2>0) | |||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); | |||
ip += rLength; | |||
anchor = ip; | |||
continue; /* faster when present ... (?) */ | |||
} } } | |||
if (dictMode == ZSTD_dictMatchState) { | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState | |||
&& repIndex2 < prefixLowestIndex ? | |||
dictBase + repIndex2 - dictIndexDelta : | |||
base + repIndex2; | |||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; | |||
} | |||
break; | |||
} } | |||
if (dictMode == ZSTD_noDict) { | |||
while ( (ip <= ilimit) | |||
&& ( (offset_2>0) | |||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); | |||
ip += rLength; | |||
anchor = ip; | |||
continue; /* faster when present ... (?) */ | |||
} } } | |||
} /* while (ip < ilimit) */ | |||
/* save reps for next block */ | |||
seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; | |||
seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; | |||
rep[0] = offset_1 ? offset_1 : offsetSaved; | |||
rep[1] = offset_2 ? offset_2 : offsetSaved; | |||
/* Return the last literals size */ | |||
return iend - anchor; | |||
return (size_t)(iend - anchor); | |||
} | |||
size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) | |||
size_t ZSTD_compressBlock_doubleFast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
const U32 mls = ctx->appliedParams.cParams.searchLength; | |||
const U32 mls = ms->cParams.minMatch; | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); | |||
} | |||
} | |||
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize, | |||
const U32 mls) | |||
size_t ZSTD_compressBlock_doubleFast_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
U32* const hashLong = ctx->hashTable; | |||
U32 const hBitsL = ctx->appliedParams.cParams.hashLog; | |||
U32* const hashSmall = ctx->chainTable; | |||
U32 const hBitsS = ctx->appliedParams.cParams.chainLog; | |||
seqStore_t* seqStorePtr = &(ctx->seqStore); | |||
const BYTE* const base = ctx->base; | |||
const BYTE* const dictBase = ctx->dictBase; | |||
const U32 mls = ms->cParams.minMatch; | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); | |||
} | |||
} | |||
static size_t ZSTD_compressBlock_doubleFast_extDict_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, | |||
U32 const mls /* template */) | |||
{ | |||
ZSTD_compressionParameters const* cParams = &ms->cParams; | |||
U32* const hashLong = ms->hashTable; | |||
U32 const hBitsL = cParams->hashLog; | |||
U32* const hashSmall = ms->chainTable; | |||
U32 const hBitsS = cParams->chainLog; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = ctx->lowLimit; | |||
const BYTE* const dictStart = dictBase + lowestIndex; | |||
const U32 dictLimit = ctx->dictLimit; | |||
const BYTE* const lowPrefixPtr = base + dictLimit; | |||
const BYTE* const dictEnd = dictBase + dictLimit; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - 8; | |||
U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; | |||
const BYTE* const base = ms->window.base; | |||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |||
const U32 dictStartIndex = lowLimit; | |||
const U32 dictLimit = ms->window.dictLimit; | |||
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; | |||
const BYTE* const prefixStart = base + prefixStartIndex; | |||
const BYTE* const dictBase = ms->window.dictBase; | |||
const BYTE* const dictStart = dictBase + dictStartIndex; | |||
const BYTE* const dictEnd = dictBase + prefixStartIndex; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); | |||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */ | |||
if (prefixStartIndex == dictStartIndex) | |||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); | |||
/* Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |||
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); | |||
const U32 matchIndex = hashSmall[hSmall]; | |||
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; | |||
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* match = matchBase + matchIndex; | |||
const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); | |||
const U32 matchLongIndex = hashLong[hLong]; | |||
const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; | |||
const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* matchLong = matchLongBase + matchLongIndex; | |||
const U32 current = (U32)(ip-base); | |||
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ | |||
const BYTE* repBase = repIndex < dictLimit ? dictBase : base; | |||
const BYTE* repMatch = repBase + repIndex; | |||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* const repMatch = repBase + repIndex; | |||
size_t mLength; | |||
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ | |||
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; | |||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ | |||
& (repIndex > dictStartIndex)) | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
} else { | |||
if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { | |||
const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; | |||
const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; | |||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { | |||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; | |||
U32 offset; | |||
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; | |||
mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; | |||
offset = current - matchLongIndex; | |||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
} else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { | |||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { | |||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |||
U32 const matchIndex3 = hashLong[h3]; | |||
const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; | |||
const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; | |||
const BYTE* match3 = match3Base + matchIndex3; | |||
U32 offset; | |||
hashLong[h3] = current + 1; | |||
if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { | |||
const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; | |||
const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; | |||
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; | |||
if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { | |||
const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; | |||
mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; | |||
ip++; | |||
offset = current+1 - matchIndex3; | |||
while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ | |||
} else { | |||
const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; | |||
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; | |||
const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |||
offset = current - matchIndex; | |||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
} | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
} else { | |||
ip += ((ip-anchor) >> g_searchStrength) + 1; | |||
ip += ((ip-anchor) >> kSearchStrength) + 1; | |||
continue; | |||
} } | |||
/* found a match : store it */ | |||
/* move to next sequence start */ | |||
ip += mLength; | |||
anchor = ip; | |||
if (ip <= ilimit) { | |||
/* Fill Table */ | |||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; | |||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; | |||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
/* Complementary insertion */ | |||
/* done after iLimit test, as candidates could be > iend-8 */ | |||
{ U32 const indexToInsert = current+2; | |||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |||
} | |||
/* check immediate repcode */ | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); | |||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ | |||
& (repIndex2 > dictStartIndex)) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |||
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; | |||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; | |||
ip += repLength2; | |||
@@ -282,27 +493,29 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, | |||
} } } | |||
/* save reps for next block */ | |||
seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; | |||
rep[0] = offset_1; | |||
rep[1] = offset_2; | |||
/* Return the last literals size */ | |||
return iend - anchor; | |||
return (size_t)(iend - anchor); | |||
} | |||
size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
size_t ZSTD_compressBlock_doubleFast_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
U32 const mls = ctx->appliedParams.cParams.searchLength; | |||
U32 const mls = ms->cParams.minMatch; | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); | |||
case 5 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); | |||
case 6 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); | |||
case 7 : | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); | |||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,15 +11,25 @@ | |||
#ifndef ZSTD_DOUBLE_FAST_H | |||
#define ZSTD_DOUBLE_FAST_H | |||
#include "zstd_compress.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls); | |||
size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
#include "mem.h" /* U32 */ | |||
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ | |||
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm); | |||
size_t ZSTD_compressBlock_doubleFast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_doubleFast_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_doubleFast_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -35,12 +35,20 @@ extern "C" { | |||
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY | |||
#endif | |||
/*-**************************************** | |||
* error codes list | |||
* note : this API is still considered unstable | |||
* and shall not be used with a dynamic library. | |||
* only static linking is allowed | |||
******************************************/ | |||
/*-********************************************* | |||
* Error codes list | |||
*-********************************************* | |||
* Error codes _values_ are pinned down since v1.3.1 only. | |||
* Therefore, don't rely on values if you may link to any version < v1.3.1. | |||
* | |||
* Only values < 100 are considered stable. | |||
* | |||
* note 1 : this API shall be used with static linking only. | |||
* dynamic linking is not yet officially supported. | |||
* note 2 : Prefer relying on the enum than on its value whenever possible | |||
* This is the only supported way to use the error list < v1.3.1 | |||
* note 3 : ZSTD_isError() is always correct, whatever the library version. | |||
**********************************************/ | |||
typedef enum { | |||
ZSTD_error_no_error = 0, | |||
ZSTD_error_GENERIC = 1, | |||
@@ -61,11 +69,15 @@ typedef enum { | |||
ZSTD_error_stage_wrong = 60, | |||
ZSTD_error_init_missing = 62, | |||
ZSTD_error_memory_allocation = 64, | |||
ZSTD_error_workSpace_tooSmall= 66, | |||
ZSTD_error_dstSize_tooSmall = 70, | |||
ZSTD_error_srcSize_wrong = 72, | |||
ZSTD_error_dstBuffer_null = 74, | |||
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ | |||
ZSTD_error_frameIndex_tooLarge = 100, | |||
ZSTD_error_seekableIO = 102, | |||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it may change in future versions! Use ZSTD_isError() instead */ | |||
ZSTD_error_dstBuffer_wrong = 104, | |||
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ | |||
} ZSTD_ErrorCode; | |||
/*! ZSTD_getErrorCode() : |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -8,77 +8,306 @@ | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ | |||
#include "zstd_fast.h" | |||
void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) | |||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |||
const void* const end, | |||
ZSTD_dictTableLoadMethod_e dtlm) | |||
{ | |||
U32* const hashTable = zc->hashTable; | |||
U32 const hBits = zc->appliedParams.cParams.hashLog; | |||
const BYTE* const base = zc->base; | |||
const BYTE* ip = base + zc->nextToUpdate; | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashTable = ms->hashTable; | |||
U32 const hBits = cParams->hashLog; | |||
U32 const mls = cParams->minMatch; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* ip = base + ms->nextToUpdate; | |||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; | |||
const size_t fastHashFillStep = 3; | |||
const U32 fastHashFillStep = 3; | |||
while(ip <= iend) { | |||
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); | |||
ip += fastHashFillStep; | |||
} | |||
/* Always insert every fastHashFillStep position into the hash table. | |||
* Insert the other positions if their hash entry is empty. | |||
*/ | |||
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { | |||
U32 const current = (U32)(ip - base); | |||
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); | |||
hashTable[hash0] = current; | |||
if (dtlm == ZSTD_dtlm_fast) continue; | |||
/* Only load extra positions for ZSTD_dtlm_full */ | |||
{ U32 p; | |||
for (p = 1; p < fastHashFillStep; ++p) { | |||
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); | |||
if (hashTable[hash] == 0) { /* not yet filled */ | |||
hashTable[hash] = current + p; | |||
} } } } | |||
} | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, | |||
const void* src, size_t srcSize, | |||
const U32 mls) | |||
FORCE_INLINE_TEMPLATE size_t | |||
ZSTD_compressBlock_fast_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, | |||
U32 const mls) | |||
{ | |||
U32* const hashTable = cctx->hashTable; | |||
U32 const hBits = cctx->appliedParams.cParams.hashLog; | |||
seqStore_t* seqStorePtr = &(cctx->seqStore); | |||
const BYTE* const base = cctx->base; | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashTable = ms->hashTable; | |||
U32 const hlog = cParams->hashLog; | |||
/* support stepSize of 0 */ | |||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ | |||
const BYTE* ip0 = istart; | |||
const BYTE* ip1; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = cctx->dictLimit; | |||
const BYTE* const lowest = base + lowestIndex; | |||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); | |||
const BYTE* const prefixStart = base + prefixStartIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
U32 offsetSaved = 0; | |||
/* init */ | |||
ip += (ip==lowest); | |||
{ U32 const maxRep = (U32)(ip-lowest); | |||
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); | |||
ip0 += (ip0 == prefixStart); | |||
ip1 = ip0 + 1; | |||
{ U32 const current = (U32)(ip0 - base); | |||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); | |||
U32 const maxRep = current - windowLow; | |||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |||
} | |||
/* Main Search Loop */ | |||
#ifdef __INTEL_COMPILER | |||
/* From intel 'The vector pragma indicates that the loop should be | |||
* vectorized if it is legal to do so'. Can be used together with | |||
* #pragma ivdep (but have opted to exclude that because intel | |||
* warns against using it).*/ | |||
#pragma vector always | |||
#endif | |||
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ | |||
size_t mLength; | |||
BYTE const* ip2 = ip0 + 2; | |||
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); | |||
U32 const val0 = MEM_read32(ip0); | |||
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); | |||
U32 const val1 = MEM_read32(ip1); | |||
U32 const current0 = (U32)(ip0-base); | |||
U32 const current1 = (U32)(ip1-base); | |||
U32 const matchIndex0 = hashTable[h0]; | |||
U32 const matchIndex1 = hashTable[h1]; | |||
BYTE const* repMatch = ip2 - offset_1; | |||
const BYTE* match0 = base + matchIndex0; | |||
const BYTE* match1 = base + matchIndex1; | |||
U32 offcode; | |||
#if defined(__aarch64__) | |||
PREFETCH_L1(ip0+256); | |||
#endif | |||
hashTable[h0] = current0; /* update hash table */ | |||
hashTable[h1] = current1; /* update hash table */ | |||
assert(ip0 + 1 == ip1); | |||
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { | |||
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; | |||
ip0 = ip2 - mLength; | |||
match0 = repMatch - mLength; | |||
mLength += 4; | |||
offcode = 0; | |||
goto _match; | |||
} | |||
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { | |||
/* found a regular match */ | |||
goto _offset; | |||
} | |||
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { | |||
/* found a regular match after one literal */ | |||
ip0 = ip1; | |||
match0 = match1; | |||
goto _offset; | |||
} | |||
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; | |||
assert(step >= 2); | |||
ip0 += step; | |||
ip1 += step; | |||
continue; | |||
} | |||
_offset: /* Requires: ip0, match0 */ | |||
/* Compute the offset code */ | |||
offset_2 = offset_1; | |||
offset_1 = (U32)(ip0-match0); | |||
offcode = offset_1 + ZSTD_REP_MOVE; | |||
mLength = 4; | |||
/* Count the backwards match length */ | |||
while (((ip0>anchor) & (match0>prefixStart)) | |||
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ | |||
_match: /* Requires: ip0, match0, offcode */ | |||
/* Count the forward length */ | |||
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); | |||
/* match found */ | |||
ip0 += mLength; | |||
anchor = ip0; | |||
if (ip0 <= ilimit) { | |||
/* Fill Table */ | |||
assert(base+current0+2 > istart); /* check base overflow */ | |||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ | |||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); | |||
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ | |||
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; | |||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ | |||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); | |||
ip0 += rLength; | |||
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); | |||
anchor = ip0; | |||
continue; /* faster when present (confirmed on gcc-8) ... (?) */ | |||
} } } | |||
ip1 = ip0 + 1; | |||
} | |||
/* save reps for next block */ | |||
rep[0] = offset_1 ? offset_1 : offsetSaved; | |||
rep[1] = offset_2 ? offset_2 : offsetSaved; | |||
/* Return the last literals size */ | |||
return (size_t)(iend - anchor); | |||
} | |||
size_t ZSTD_compressBlock_fast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
U32 const mls = ms->cParams.minMatch; | |||
assert(ms->dictMatchState == NULL); | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); | |||
case 5 : | |||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); | |||
case 6 : | |||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); | |||
case 7 : | |||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); | |||
} | |||
} | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, U32 const mls) | |||
{ | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashTable = ms->hashTable; | |||
U32 const hlog = cParams->hashLog; | |||
/* support stepSize of 0 */ | |||
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 prefixStartIndex = ms->window.dictLimit; | |||
const BYTE* const prefixStart = base + prefixStartIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - HASH_READ_SIZE; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
U32 offsetSaved = 0; | |||
const ZSTD_matchState_t* const dms = ms->dictMatchState; | |||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; | |||
const U32* const dictHashTable = dms->hashTable; | |||
const U32 dictStartIndex = dms->window.dictLimit; | |||
const BYTE* const dictBase = dms->window.base; | |||
const BYTE* const dictStart = dictBase + dictStartIndex; | |||
const BYTE* const dictEnd = dms->window.nextSrc; | |||
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); | |||
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); | |||
const U32 dictHLog = dictCParams->hashLog; | |||
/* if a dictionary is still attached, it necessarily means that | |||
* it is within window size. So we just check it. */ | |||
const U32 maxDistance = 1U << cParams->windowLog; | |||
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); | |||
assert(endIndex - prefixStartIndex <= maxDistance); | |||
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ | |||
/* ensure there will be no no underflow | |||
* when translating a dict index into a local index */ | |||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); | |||
/* init */ | |||
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); | |||
ip += (dictAndPrefixLength == 0); | |||
/* dictMatchState repCode checks don't currently handle repCode == 0 | |||
* disabling. */ | |||
assert(offset_1 <= dictAndPrefixLength); | |||
assert(offset_2 <= dictAndPrefixLength); | |||
/* Main Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |||
size_t mLength; | |||
size_t const h = ZSTD_hashPtr(ip, hBits, mls); | |||
size_t const h = ZSTD_hashPtr(ip, hlog, mls); | |||
U32 const current = (U32)(ip-base); | |||
U32 const matchIndex = hashTable[h]; | |||
const BYTE* match = base + matchIndex; | |||
const U32 repIndex = current + 1 - offset_1; | |||
const BYTE* repMatch = (repIndex < prefixStartIndex) ? | |||
dictBase + (repIndex - dictIndexDelta) : | |||
base + repIndex; | |||
hashTable[h] = current; /* update hash table */ | |||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { | |||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |||
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); | |||
} else { | |||
U32 offset; | |||
if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { | |||
ip += ((ip-anchor) >> g_searchStrength) + 1; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); | |||
} else if ( (matchIndex <= prefixStartIndex) ) { | |||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); | |||
U32 const dictMatchIndex = dictHashTable[dictHash]; | |||
const BYTE* dictMatch = dictBase + dictMatchIndex; | |||
if (dictMatchIndex <= dictStartIndex || | |||
MEM_read32(dictMatch) != MEM_read32(ip)) { | |||
assert(stepSize >= 1); | |||
ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |||
continue; | |||
} else { | |||
/* found a dict match */ | |||
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); | |||
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; | |||
while (((ip>anchor) & (dictMatch>dictStart)) | |||
&& (ip[-1] == dictMatch[-1])) { | |||
ip--; dictMatch--; mLength++; | |||
} /* catch up */ | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
} | |||
} else if (MEM_read32(match) != MEM_read32(ip)) { | |||
/* it's not a match, and we're not going to check the dictionary */ | |||
assert(stepSize >= 1); | |||
ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |||
continue; | |||
} else { | |||
/* found a regular match */ | |||
U32 const offset = (U32)(ip-match); | |||
mLength = ZSTD_count(ip+4, match+4, iend) + 4; | |||
offset = (U32)(ip-match); | |||
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
while (((ip>anchor) & (match>prefixStart)) | |||
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
} | |||
/* match found */ | |||
@@ -87,127 +316,150 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, | |||
if (ip <= ilimit) { | |||
/* Fill Table */ | |||
hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ | |||
hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); | |||
assert(base+current+2 > istart); /* check base overflow */ | |||
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ | |||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); | |||
/* check immediate repcode */ | |||
while ( (ip <= ilimit) | |||
&& ( (offset_2>0) | |||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |||
/* store sequence */ | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ | |||
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); | |||
ip += rLength; | |||
anchor = ip; | |||
continue; /* faster when present ... (?) */ | |||
} } } | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? | |||
dictBase - dictIndexDelta + repIndex2 : | |||
base + repIndex2; | |||
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); | |||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; | |||
} | |||
break; | |||
} | |||
} | |||
} | |||
/* save reps for next block */ | |||
seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; | |||
seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; | |||
rep[0] = offset_1 ? offset_1 : offsetSaved; | |||
rep[1] = offset_2 ? offset_2 : offsetSaved; | |||
/* Return the last literals size */ | |||
return iend - anchor; | |||
return (size_t)(iend - anchor); | |||
} | |||
size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
size_t ZSTD_compressBlock_fast_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
const U32 mls = ctx->appliedParams.cParams.searchLength; | |||
U32 const mls = ms->cParams.minMatch; | |||
assert(ms->dictMatchState != NULL); | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); | |||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); | |||
case 5 : | |||
return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); | |||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); | |||
case 6 : | |||
return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); | |||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); | |||
case 7 : | |||
return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); | |||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); | |||
} | |||
} | |||
static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize, | |||
const U32 mls) | |||
static size_t ZSTD_compressBlock_fast_extDict_generic( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize, U32 const mls) | |||
{ | |||
U32* hashTable = ctx->hashTable; | |||
const U32 hBits = ctx->appliedParams.cParams.hashLog; | |||
seqStore_t* seqStorePtr = &(ctx->seqStore); | |||
const BYTE* const base = ctx->base; | |||
const BYTE* const dictBase = ctx->dictBase; | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
U32* const hashTable = ms->hashTable; | |||
U32 const hlog = cParams->hashLog; | |||
/* support stepSize of 0 */ | |||
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); | |||
const BYTE* const base = ms->window.base; | |||
const BYTE* const dictBase = ms->window.dictBase; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = ctx->lowLimit; | |||
const BYTE* const dictStart = dictBase + lowestIndex; | |||
const U32 dictLimit = ctx->dictLimit; | |||
const BYTE* const lowPrefixPtr = base + dictLimit; | |||
const BYTE* const dictEnd = dictBase + dictLimit; | |||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |||
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |||
const U32 dictStartIndex = lowLimit; | |||
const BYTE* const dictStart = dictBase + dictStartIndex; | |||
const U32 dictLimit = ms->window.dictLimit; | |||
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; | |||
const BYTE* const prefixStart = base + prefixStartIndex; | |||
const BYTE* const dictEnd = dictBase + prefixStartIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - 8; | |||
U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; | |||
U32 offset_1=rep[0], offset_2=rep[1]; | |||
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); | |||
/* switch to "regular" variant if extDict is invalidated due to maxDistance */ | |||
if (prefixStartIndex == dictStartIndex) | |||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); | |||
/* Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |||
const size_t h = ZSTD_hashPtr(ip, hBits, mls); | |||
const U32 matchIndex = hashTable[h]; | |||
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; | |||
const BYTE* match = matchBase + matchIndex; | |||
const U32 current = (U32)(ip-base); | |||
const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ | |||
const BYTE* repBase = repIndex < dictLimit ? dictBase : base; | |||
const BYTE* repMatch = repBase + repIndex; | |||
size_t mLength; | |||
const size_t h = ZSTD_hashPtr(ip, hlog, mls); | |||
const U32 matchIndex = hashTable[h]; | |||
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* match = matchBase + matchIndex; | |||
const U32 current = (U32)(ip-base); | |||
const U32 repIndex = current + 1 - offset_1; | |||
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; | |||
const BYTE* const repMatch = repBase + repIndex; | |||
hashTable[h] = current; /* update hash table */ | |||
DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); | |||
assert(offset_1 <= current +1); /* check repIndex */ | |||
if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) | |||
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) | |||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |||
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; | |||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; | |||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |||
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; | |||
ip++; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); | |||
ip += rLength; | |||
anchor = ip; | |||
} else { | |||
if ( (matchIndex < lowestIndex) || | |||
if ( (matchIndex < dictStartIndex) || | |||
(MEM_read32(match) != MEM_read32(ip)) ) { | |||
ip += ((ip-anchor) >> g_searchStrength) + 1; | |||
assert(stepSize >= 1); | |||
ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |||
continue; | |||
} | |||
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; | |||
const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; | |||
U32 offset; | |||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; | |||
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |||
U32 const offset = current - matchIndex; | |||
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |||
offset = current - matchIndex; | |||
offset_2 = offset_1; | |||
offset_1 = offset; | |||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
offset_2 = offset_1; offset_1 = offset; /* update offset history */ | |||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |||
ip += mLength; | |||
anchor = ip; | |||
} } | |||
/* found a match : store it */ | |||
ip += mLength; | |||
anchor = ip; | |||
if (ip <= ilimit) { | |||
/* Fill Table */ | |||
hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; | |||
hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); | |||
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; | |||
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); | |||
/* check immediate repcode */ | |||
while (ip <= ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - offset_2; | |||
const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ | |||
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; | |||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); | |||
hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; | |||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |||
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ | |||
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); | |||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; | |||
@@ -216,27 +468,29 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, | |||
} } } | |||
/* save reps for next block */ | |||
seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; | |||
rep[0] = offset_1; | |||
rep[1] = offset_2; | |||
/* Return the last literals size */ | |||
return iend - anchor; | |||
return (size_t)(iend - anchor); | |||
} | |||
size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
size_t ZSTD_compressBlock_fast_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
U32 const mls = ctx->appliedParams.cParams.searchLength; | |||
U32 const mls = ms->cParams.minMatch; | |||
switch(mls) | |||
{ | |||
default: /* includes case 3 */ | |||
case 4 : | |||
return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); | |||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); | |||
case 5 : | |||
return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); | |||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); | |||
case 6 : | |||
return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); | |||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); | |||
case 7 : | |||
return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); | |||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); | |||
} | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,17 +11,24 @@ | |||
#ifndef ZSTD_FAST_H | |||
#define ZSTD_FAST_H | |||
#include "zstd_compress.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls); | |||
size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize); | |||
#include "mem.h" /* U32 */ | |||
#include "zstd_compress_internal.h" | |||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, | |||
void const* end, ZSTD_dictTableLoadMethod_e dtlm); | |||
size_t ZSTD_compressBlock_fast( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_fast_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_fast_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,17 +11,25 @@ | |||
#ifndef ZSTD_CCOMMON_H_MODULE | |||
#define ZSTD_CCOMMON_H_MODULE | |||
/* this module contains definitions which must be identical | |||
* across compression, decompression and dictBuilder. | |||
* It also contains a few functions useful to at least 2 of them | |||
* and which benefit from being inlined */ | |||
/*-************************************* | |||
* Dependencies | |||
***************************************/ | |||
#ifdef __aarch64__ | |||
#include <arm_neon.h> | |||
#endif | |||
#include "compiler.h" | |||
#include "mem.h" | |||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ | |||
#include "error_private.h" | |||
#ifndef ZSTD_STATIC_LINKING_ONLY | |||
#define ZSTD_STATIC_LINKING_ONLY | |||
#endif | |||
#include "zstd.h" | |||
#include "../zstd.h" | |||
#define FSE_STATIC_LINKING_ONLY | |||
#include "fse.h" | |||
#define HUF_STATIC_LINKING_ONLY | |||
@@ -31,43 +39,15 @@ | |||
#endif | |||
#include "xxhash.h" /* XXH_reset, update, digest */ | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/*-************************************* | |||
* Debug | |||
***************************************/ | |||
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) | |||
# include <assert.h> | |||
#else | |||
# ifndef assert | |||
# define assert(condition) ((void)0) | |||
# endif | |||
#endif | |||
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } | |||
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) | |||
# include <stdio.h> | |||
/* recommended values for ZSTD_DEBUG display levels : | |||
* 1 : no display, enables assert() only | |||
* 2 : reserved for currently active debugging path | |||
* 3 : events once per object lifetime (CCtx, CDict) | |||
* 4 : events once per frame | |||
* 5 : events once per block | |||
* 6 : events once per sequence (*very* verbose) */ | |||
# define DEBUGLOG(l, ...) { \ | |||
if (l<=ZSTD_DEBUG) { \ | |||
fprintf(stderr, __FILE__ ": "); \ | |||
fprintf(stderr, __VA_ARGS__); \ | |||
fprintf(stderr, " \n"); \ | |||
} } | |||
#else | |||
# define DEBUGLOG(l, ...) {} /* disabled */ | |||
#endif | |||
/* ---- static assert (debug) --- */ | |||
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) | |||
#define ZSTD_isError ERR_isError /* for inlining */ | |||
#define FSE_isError ERR_isError | |||
#define HUF_isError ERR_isError | |||
/*-************************************* | |||
@@ -77,8 +57,81 @@ extern "C" { | |||
#undef MAX | |||
#define MIN(a,b) ((a)<(b) ? (a) : (b)) | |||
#define MAX(a,b) ((a)>(b) ? (a) : (b)) | |||
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ | |||
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* This is a helper function to help force C99-correctness during compilation. | |||
* Under strict compilation modes, variadic macro arguments can't be empty. | |||
* However, variadic function arguments can be. Using a function therefore lets | |||
* us statically check that at least one (string) argument was passed, | |||
* independent of the compilation flags. | |||
*/ | |||
static INLINE_KEYWORD UNUSED_ATTR | |||
void _force_has_format_string(const char *format, ...) { | |||
(void)format; | |||
} | |||
/** | |||
* Ignore: this is an internal helper. | |||
* | |||
* We want to force this function invocation to be syntactically correct, but | |||
* we don't want to force runtime evaluation of its arguments. | |||
*/ | |||
#define _FORCE_HAS_FORMAT_STRING(...) \ | |||
if (0) { \ | |||
_force_has_format_string(__VA_ARGS__); \ | |||
} | |||
/** | |||
* Return the specified error if the condition evaluates to true. | |||
* | |||
* In debug modes, prints additional information. | |||
* In order to do that (particularly, printing the conditional that failed), | |||
* this can't just wrap RETURN_ERROR(). | |||
*/ | |||
#define RETURN_ERROR_IF(cond, err, ...) \ | |||
if (cond) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} | |||
/** | |||
* Unconditionally return the specified error. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define RETURN_ERROR(err, ...) \ | |||
do { \ | |||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return ERROR(err); \ | |||
} while(0); | |||
/** | |||
* If the provided expression evaluates to an error code, returns that error code. | |||
* | |||
* In debug modes, prints additional information. | |||
*/ | |||
#define FORWARD_IF_ERROR(err, ...) \ | |||
do { \ | |||
size_t const err_code = (err); \ | |||
if (ERR_isError(err_code)) { \ | |||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ | |||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ | |||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ | |||
RAWLOG(3, ": " __VA_ARGS__); \ | |||
RAWLOG(3, "\n"); \ | |||
return err_code; \ | |||
} \ | |||
} while(0); | |||
/*-************************************* | |||
@@ -87,9 +140,7 @@ extern "C" { | |||
#define ZSTD_OPT_NUM (1<<12) | |||
#define ZSTD_REP_NUM 3 /* number of repcodes */ | |||
#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ | |||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) | |||
#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) | |||
static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; | |||
#define KB *(1 <<10) | |||
@@ -107,10 +158,14 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; | |||
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; | |||
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; | |||
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ | |||
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ | |||
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; | |||
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; | |||
#define ZSTD_FRAMECHECKSUMSIZE 4 | |||
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ | |||
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ | |||
@@ -123,36 +178,50 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy | |||
#define Litbits 8 | |||
#define MaxLit ((1<<Litbits) - 1) | |||
#define MaxML 52 | |||
#define MaxLL 35 | |||
#define MaxOff 28 | |||
#define MaxML 52 | |||
#define MaxLL 35 | |||
#define DefaultMaxOff 28 | |||
#define MaxOff 31 | |||
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ | |||
#define MLFSELog 9 | |||
#define LLFSELog 9 | |||
#define OffFSELog 8 | |||
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) | |||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12, | |||
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 6, 7, 8, 9,10,11,12, | |||
13,14,15,16 }; | |||
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, | |||
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, | |||
2, 2, 2, 2, 2, 1, 1, 1, | |||
2, 2, 2, 2, 2, 2, 2, 2, | |||
2, 3, 2, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1 }; | |||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */ | |||
static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; | |||
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11, | |||
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 0, 0, 0, 0, 0, 0, 0, | |||
1, 1, 1, 1, 2, 2, 3, 3, | |||
4, 4, 5, 7, 8, 9,10,11, | |||
12,13,14,15,16 }; | |||
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, | |||
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1,-1,-1, | |||
-1,-1,-1,-1,-1 }; | |||
#define ML_DEFAULTNORMLOG 6 /* for static allocation */ | |||
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; | |||
static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 }; | |||
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, | |||
2, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, | |||
-1,-1,-1,-1,-1 }; | |||
#define OF_DEFAULTNORMLOG 5 /* for static allocation */ | |||
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; | |||
@@ -160,45 +229,109 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; | |||
/*-******************************************* | |||
* Shared functions to include for inlining | |||
*********************************************/ | |||
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } | |||
static void ZSTD_copy8(void* dst, const void* src) { | |||
#ifdef __aarch64__ | |||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); | |||
#else | |||
memcpy(dst, src, 8); | |||
#endif | |||
} | |||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } | |||
static void ZSTD_copy16(void* dst, const void* src) { | |||
#ifdef __aarch64__ | |||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); | |||
#else | |||
memcpy(dst, src, 16); | |||
#endif | |||
} | |||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } | |||
#define WILDCOPY_OVERLENGTH 32 | |||
#define WILDCOPY_VECLEN 16 | |||
typedef enum { | |||
ZSTD_no_overlap, | |||
ZSTD_overlap_src_before_dst | |||
/* ZSTD_overlap_dst_before_src, */ | |||
} ZSTD_overlap_e; | |||
/*! ZSTD_wildcopy() : | |||
* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ | |||
#define WILDCOPY_OVERLENGTH 8 | |||
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) | |||
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) | |||
* @param ovtype controls the overlap detection | |||
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. | |||
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. | |||
* The src buffer must be before the dst buffer. | |||
*/ | |||
MEM_STATIC FORCE_INLINE_ATTR | |||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) | |||
{ | |||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |||
const BYTE* ip = (const BYTE*)src; | |||
BYTE* op = (BYTE*)dst; | |||
BYTE* const oend = op + length; | |||
do | |||
COPY8(op, ip) | |||
while (op < oend); | |||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); | |||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { | |||
/* Handle short offset copies. */ | |||
do { | |||
COPY8(op, ip) | |||
} while (op < oend); | |||
} else { | |||
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); | |||
/* Separate out the first COPY16() call because the copy length is | |||
* almost certain to be short, so the branches have different | |||
* probabilities. Since it is almost certain to be short, only do | |||
* one COPY16() in the first call. Then, do two calls per loop since | |||
* at that point it is more likely to have a high trip count. | |||
*/ | |||
#ifndef __aarch64__ | |||
do { | |||
COPY16(op, ip); | |||
} | |||
while (op < oend); | |||
#else | |||
COPY16(op, ip); | |||
if (op >= oend) return; | |||
do { | |||
COPY16(op, ip); | |||
COPY16(op, ip); | |||
} | |||
while (op < oend); | |||
#endif | |||
} | |||
} | |||
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ | |||
MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |||
{ | |||
const BYTE* ip = (const BYTE*)src; | |||
BYTE* op = (BYTE*)dst; | |||
BYTE* const oend = (BYTE*)dstEnd; | |||
do | |||
COPY8(op, ip) | |||
while (op < oend); | |||
size_t const length = MIN(dstCapacity, srcSize); | |||
if (length > 0) { | |||
memcpy(dst, src, length); | |||
} | |||
return length; | |||
} | |||
/* define "workspace is too large" as this number of times larger than needed */ | |||
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 | |||
/* when workspace is continuously too large | |||
* during at least this number of times, | |||
* context's memory usage is considered wasteful, | |||
* because it's sized to handle a worst case scenario which rarely happens. | |||
* In which case, resize it down to free some memory */ | |||
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 | |||
/*-******************************************* | |||
* Private interfaces | |||
* Private declarations | |||
*********************************************/ | |||
typedef struct ZSTD_stats_s ZSTD_stats_t; | |||
typedef struct seqDef_s { | |||
U32 offset; | |||
U16 litLength; | |||
U16 matchLength; | |||
} seqDef; | |||
typedef struct { | |||
seqDef* sequencesStart; | |||
seqDef* sequences; | |||
@@ -207,105 +340,50 @@ typedef struct { | |||
BYTE* llCode; | |||
BYTE* mlCode; | |||
BYTE* ofCode; | |||
size_t maxNbSeq; | |||
size_t maxNbLit; | |||
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ | |||
U32 longLengthPos; | |||
U32 rep[ZSTD_REP_NUM]; | |||
U32 repToConfirm[ZSTD_REP_NUM]; | |||
} seqStore_t; | |||
typedef struct { | |||
U32 off; | |||
U32 len; | |||
} ZSTD_match_t; | |||
typedef struct { | |||
U32 price; | |||
U32 off; | |||
U32 mlen; | |||
U32 litlen; | |||
U32 rep[ZSTD_REP_NUM]; | |||
} ZSTD_optimal_t; | |||
typedef struct { | |||
U32* litFreq; | |||
U32* litLengthFreq; | |||
U32* matchLengthFreq; | |||
U32* offCodeFreq; | |||
ZSTD_match_t* matchTable; | |||
ZSTD_optimal_t* priceTable; | |||
U32 matchLengthSum; | |||
U32 matchSum; | |||
U32 litLengthSum; | |||
U32 litSum; | |||
U32 offCodeSum; | |||
U32 log2matchLengthSum; | |||
U32 log2matchSum; | |||
U32 log2litLengthSum; | |||
U32 log2litSum; | |||
U32 log2offCodeSum; | |||
U32 factor; | |||
U32 staticPrices; | |||
U32 cachedPrice; | |||
U32 cachedLitLength; | |||
const BYTE* cachedLiterals; | |||
} optState_t; | |||
typedef struct { | |||
U32 offset; | |||
U32 checksum; | |||
} ldmEntry_t; | |||
typedef struct { | |||
ldmEntry_t* hashTable; | |||
BYTE* bucketOffsets; /* Next position in bucket to insert entry */ | |||
U64 hashPower; /* Used to compute the rolling hash. | |||
* Depends on ldmParams.minMatchLength */ | |||
} ldmState_t; | |||
U32 litLength; | |||
U32 matchLength; | |||
} ZSTD_sequenceLength; | |||
typedef struct { | |||
U32 enableLdm; /* 1 if enable long distance matching */ | |||
U32 hashLog; /* Log size of hashTable */ | |||
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ | |||
U32 minMatchLength; /* Minimum match length */ | |||
U32 hashEveryLog; /* Log number of entries to skip */ | |||
} ldmParams_t; | |||
/** | |||
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences | |||
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. | |||
*/ | |||
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) | |||
{ | |||
ZSTD_sequenceLength seqLen; | |||
seqLen.litLength = seq->litLength; | |||
seqLen.matchLength = seq->matchLength + MINMATCH; | |||
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { | |||
if (seqStore->longLengthID == 1) { | |||
seqLen.litLength += 0xFFFF; | |||
} | |||
if (seqStore->longLengthID == 2) { | |||
seqLen.matchLength += 0xFFFF; | |||
} | |||
} | |||
return seqLen; | |||
} | |||
/** | |||
* Contains the compressed frame size and an upper-bound for the decompressed frame size. | |||
* Note: before using `compressedSize`, check for errors using ZSTD_isError(). | |||
* similarly, before using `decompressedBound`, check for errors using: | |||
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR` | |||
*/ | |||
typedef struct { | |||
U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; | |||
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; | |||
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; | |||
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; | |||
U32 workspace[HUF_WORKSPACE_SIZE_U32]; | |||
HUF_repeat hufCTable_repeatMode; | |||
FSE_repeat offcode_repeatMode; | |||
FSE_repeat matchlength_repeatMode; | |||
FSE_repeat litlength_repeatMode; | |||
} ZSTD_entropyCTables_t; | |||
struct ZSTD_CCtx_params_s { | |||
ZSTD_compressionParameters cParams; | |||
ZSTD_frameParameters fParams; | |||
int compressionLevel; | |||
U32 forceWindow; /* force back-references to respect limit of | |||
* 1<<wLog, even for dictionary */ | |||
size_t compressedSize; | |||
unsigned long long decompressedBound; | |||
} ZSTD_frameSizeInfo; /* decompress & legacy */ | |||
/* Multithreading: used to pass parameters to mtctx */ | |||
U32 nbThreads; | |||
unsigned jobSize; | |||
unsigned overlapSizeLog; | |||
/* Long distance matching parameters */ | |||
ldmParams_t ldmParams; | |||
/* For use with createCCtxParams() and freeCCtxParams() only */ | |||
ZSTD_customMem customMem; | |||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ | |||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); | |||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); | |||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ | |||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ | |||
/* custom memory allocation functions */ | |||
void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); | |||
@@ -313,91 +391,57 @@ void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); | |||
void ZSTD_free(void* ptr, ZSTD_customMem customMem); | |||
/*====== common function ======*/ | |||
MEM_STATIC U32 ZSTD_highbit32(U32 val) | |||
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ | |||
{ | |||
assert(val != 0); | |||
{ | |||
# if defined(_MSC_VER) /* Visual */ | |||
unsigned long r=0; | |||
_BitScanReverse(&r, val); | |||
return (unsigned)r; | |||
return _BitScanReverse(&r, val) ? (unsigned)r : 0; | |||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ | |||
return 31 - __builtin_clz(val); | |||
return __builtin_clz (val) ^ 31; | |||
# elif defined(__ICCARM__) /* IAR Intrinsic */ | |||
return 31 - __CLZ(val); | |||
# else /* Software version */ | |||
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; | |||
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; | |||
U32 v = val; | |||
int r; | |||
v |= v >> 1; | |||
v |= v >> 2; | |||
v |= v >> 4; | |||
v |= v >> 8; | |||
v |= v >> 16; | |||
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; | |||
return r; | |||
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; | |||
# endif | |||
} | |||
} | |||
/* hidden functions */ | |||
/* ZSTD_invalidateRepCodes() : | |||
* ensures next compression will not use repcodes from previous block. | |||
* Note : only works with regular variant; | |||
* do not use with extDict variant ! */ | |||
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); | |||
/*! ZSTD_initCStream_internal() : | |||
* Private use only. Init streaming operation. | |||
* expects params to be valid. | |||
* must receive dict, or cdict, or none, but not both. | |||
* @return : 0, or an error code */ | |||
size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, | |||
const void* dict, size_t dictSize, | |||
const ZSTD_CDict* cdict, | |||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); | |||
/*! ZSTD_compressStream_generic() : | |||
* Private use only. To be called from zstdmt_compress.c in single-thread mode. */ | |||
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, | |||
ZSTD_outBuffer* output, | |||
ZSTD_inBuffer* input, | |||
ZSTD_EndDirective const flushMode); | |||
/*! ZSTD_getCParamsFromCDict() : | |||
* as the name implies */ | |||
ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); | |||
/* ZSTD_compressBegin_advanced_internal() : | |||
* Private use only. To be called from zstdmt_compress.c. */ | |||
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, | |||
const void* dict, size_t dictSize, | |||
ZSTD_dictMode_e dictMode, | |||
ZSTD_CCtx_params params, | |||
unsigned long long pledgedSrcSize); | |||
/* ZSTD_compress_advanced_internal() : | |||
* Private use only. To be called from zstdmt_compress.c. */ | |||
size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
const void* dict,size_t dictSize, | |||
ZSTD_CCtx_params params); | |||
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ | |||
typedef struct { | |||
blockType_e blockType; | |||
U32 lastBlock; | |||
U32 origSize; | |||
} blockProperties_t; | |||
} blockProperties_t; /* declared here for decompress and fullbench */ | |||
/*! ZSTD_getcBlockSize() : | |||
* Provides the size of compressed block from block header `src` */ | |||
* Provides the size of compressed block from block header `src` */ | |||
/* Used by: decompress, fullbench (does not get its definition from here) */ | |||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, | |||
blockProperties_t* bpPtr); | |||
/*! ZSTD_decodeSeqHeaders() : | |||
* decode sequence header from src */ | |||
/* Used by: decompress, fullbench (does not get its definition from here) */ | |||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, | |||
const void* src, size_t srcSize); | |||
#if defined (__cplusplus) | |||
} | |||
#endif |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,25 +11,54 @@ | |||
#ifndef ZSTD_LAZY_H | |||
#define ZSTD_LAZY_H | |||
#include "zstd_compress.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); | |||
void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); | |||
void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); | |||
#include "zstd_compress_internal.h" | |||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); | |||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ | |||
size_t ZSTD_compressBlock_btlazy2( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_greedy_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_lazy2_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btlazy2_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
#if defined (__cplusplus) | |||
} |
@@ -1,48 +1,64 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#include "zstd_ldm.h" | |||
#include "debug.h" | |||
#include "zstd_fast.h" /* ZSTD_fillHashTable() */ | |||
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ | |||
#define LDM_BUCKET_SIZE_LOG 3 | |||
#define LDM_MIN_MATCH_LENGTH 64 | |||
#define LDM_HASH_LOG 20 | |||
#define LDM_HASH_RLOG 7 | |||
#define LDM_HASH_CHAR_OFFSET 10 | |||
size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) | |||
void ZSTD_ldm_adjustParameters(ldmParams_t* params, | |||
ZSTD_compressionParameters const* cParams) | |||
{ | |||
params->windowLog = cParams->windowLog; | |||
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); | |||
params->enableLdm = enableLdm>0; | |||
params->hashLog = LDM_HASH_LOG; | |||
params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; | |||
params->minMatchLength = LDM_MIN_MATCH_LENGTH; | |||
params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET; | |||
return 0; | |||
DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); | |||
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; | |||
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; | |||
if (cParams->strategy >= ZSTD_btopt) { | |||
/* Get out of the way of the optimal parser */ | |||
U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); | |||
assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); | |||
assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); | |||
params->minMatchLength = minMatch; | |||
} | |||
if (params->hashLog == 0) { | |||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); | |||
assert(params->hashLog <= ZSTD_HASHLOG_MAX); | |||
} | |||
if (params->hashRateLog == 0) { | |||
params->hashRateLog = params->windowLog < params->hashLog | |||
? 0 | |||
: params->windowLog - params->hashLog; | |||
} | |||
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); | |||
} | |||
void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog) | |||
size_t ZSTD_ldm_getTableSize(ldmParams_t params) | |||
{ | |||
if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) { | |||
params->hashEveryLog = | |||
windowLog < params->hashLog ? 0 : windowLog - params->hashLog; | |||
} | |||
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); | |||
size_t const ldmHSize = ((size_t)1) << params.hashLog; | |||
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); | |||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); | |||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) | |||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); | |||
return params.enableLdm ? totalSize : 0; | |||
} | |||
size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) { | |||
size_t const ldmHSize = ((size_t)1) << hashLog; | |||
size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog); | |||
size_t const ldmBucketSize = | |||
((size_t)1) << (hashLog - ldmBucketSizeLog); | |||
return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t))); | |||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) | |||
{ | |||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; | |||
} | |||
/** ZSTD_ldm_getSmallHash() : | |||
@@ -104,20 +120,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, | |||
* | |||
* Gets the small hash, checksum, and tag from the rollingHash. | |||
* | |||
* If the tag matches (1 << ldmParams.hashEveryLog)-1, then | |||
* If the tag matches (1 << ldmParams.hashRateLog)-1, then | |||
* creates an ldmEntry from the offset, and inserts it into the hash table. | |||
* | |||
* hBits is the length of the small hash, which is the most significant hBits | |||
* of rollingHash. The checksum is the next 32 most significant bits, followed | |||
* by ldmParams.hashEveryLog bits that make up the tag. */ | |||
* by ldmParams.hashRateLog bits that make up the tag. */ | |||
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, | |||
U64 const rollingHash, | |||
U32 const hBits, | |||
U32 const offset, | |||
ldmParams_t const ldmParams) | |||
{ | |||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); | |||
U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; | |||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); | |||
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; | |||
if (tag == tagMask) { | |||
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); | |||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); | |||
@@ -128,55 +144,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, | |||
} | |||
} | |||
/** ZSTD_ldm_getRollingHash() : | |||
* Get a 64-bit hash using the first len bytes from buf. | |||
* | |||
* Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be | |||
* H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) | |||
* | |||
* where the constant a is defined to be prime8bytes. | |||
* | |||
* The implementation adds an offset to each byte, so | |||
* H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ | |||
static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) | |||
{ | |||
U64 ret = 0; | |||
U32 i; | |||
for (i = 0; i < len; i++) { | |||
ret *= prime8bytes; | |||
ret += buf[i] + LDM_HASH_CHAR_OFFSET; | |||
} | |||
return ret; | |||
} | |||
/** ZSTD_ldm_ipow() : | |||
* Return base^exp. */ | |||
static U64 ZSTD_ldm_ipow(U64 base, U64 exp) | |||
{ | |||
U64 ret = 1; | |||
while (exp) { | |||
if (exp & 1) { ret *= base; } | |||
exp >>= 1; | |||
base *= base; | |||
} | |||
return ret; | |||
} | |||
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { | |||
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); | |||
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); | |||
} | |||
/** ZSTD_ldm_updateHash() : | |||
* Updates hash by removing toRemove and adding toAdd. */ | |||
static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) | |||
{ | |||
hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); | |||
hash *= prime8bytes; | |||
hash += toAdd + LDM_HASH_CHAR_OFFSET; | |||
return hash; | |||
} | |||
/** ZSTD_ldm_countBackwardsMatch() : | |||
* Returns the number of bytes that match backwards before pIn and pMatch. | |||
* | |||
@@ -201,21 +168,19 @@ static size_t ZSTD_ldm_countBackwardsMatch( | |||
* | |||
* The tables for the other strategies are filled within their | |||
* block compressors. */ | |||
static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end) | |||
static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, | |||
void const* end) | |||
{ | |||
const BYTE* const iend = (const BYTE*)end; | |||
const U32 mls = zc->appliedParams.cParams.searchLength; | |||
switch(zc->appliedParams.cParams.strategy) | |||
switch(ms->cParams.strategy) | |||
{ | |||
case ZSTD_fast: | |||
ZSTD_fillHashTable(zc, iend, mls); | |||
zc->nextToUpdate = (U32)(iend - zc->base); | |||
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); | |||
break; | |||
case ZSTD_dfast: | |||
ZSTD_fillDoubleHashTable(zc, iend, mls); | |||
zc->nextToUpdate = (U32)(iend - zc->base); | |||
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); | |||
break; | |||
case ZSTD_greedy: | |||
@@ -224,6 +189,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end) | |||
case ZSTD_btlazy2: | |||
case ZSTD_btopt: | |||
case ZSTD_btultra: | |||
case ZSTD_btultra2: | |||
break; | |||
default: | |||
assert(0); /* not possible : not a valid strategy id */ | |||
@@ -247,9 +213,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, | |||
const BYTE* cur = lastHashed + 1; | |||
while (cur < iend) { | |||
rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], | |||
cur[ldmParams.minMatchLength-1], | |||
state->hashPower); | |||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], | |||
cur[ldmParams.minMatchLength-1], | |||
state->hashPower); | |||
ZSTD_ldm_makeEntryAndInsertByTag(state, | |||
rollingHash, hBits, | |||
(U32)(cur - base), ldmParams); | |||
@@ -258,75 +224,82 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, | |||
return rollingHash; | |||
} | |||
void ZSTD_ldm_fillHashTable( | |||
ldmState_t* state, const BYTE* ip, | |||
const BYTE* iend, ldmParams_t const* params) | |||
{ | |||
DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); | |||
if ((size_t)(iend - ip) >= params->minMatchLength) { | |||
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); | |||
ZSTD_ldm_fillLdmHashTable( | |||
state, startingHash, ip, iend - params->minMatchLength, state->window.base, | |||
params->hashLog - params->bucketSizeLog, | |||
*params); | |||
} | |||
} | |||
/** ZSTD_ldm_limitTableUpdate() : | |||
* | |||
* Sets cctx->nextToUpdate to a position corresponding closer to anchor | |||
* if it is far way | |||
* (after a long match, only update tables a limited amount). */ | |||
static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor) | |||
static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) | |||
{ | |||
U32 const current = (U32)(anchor - cctx->base); | |||
if (current > cctx->nextToUpdate + 1024) { | |||
cctx->nextToUpdate = | |||
current - MIN(512, current - cctx->nextToUpdate - 1024); | |||
U32 const current = (U32)(anchor - ms->window.base); | |||
if (current > ms->nextToUpdate + 1024) { | |||
ms->nextToUpdate = | |||
current - MIN(512, current - ms->nextToUpdate - 1024); | |||
} | |||
} | |||
typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
/* defined in zstd_compress.c */ | |||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); | |||
FORCE_INLINE_TEMPLATE | |||
size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, | |||
const void* src, size_t srcSize) | |||
static size_t ZSTD_ldm_generateSequences_internal( | |||
ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, | |||
ldmParams_t const* params, void const* src, size_t srcSize) | |||
{ | |||
ldmState_t* const ldmState = &(cctx->ldmState); | |||
const ldmParams_t ldmParams = cctx->appliedParams.ldmParams; | |||
const U64 hashPower = ldmState->hashPower; | |||
const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; | |||
const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); | |||
const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; | |||
seqStore_t* const seqStorePtr = &(cctx->seqStore); | |||
const BYTE* const base = cctx->base; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = cctx->dictLimit; | |||
const BYTE* const lowest = base + lowestIndex; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); | |||
const ZSTD_blockCompressor blockCompressor = | |||
ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0); | |||
U32* const repToConfirm = seqStorePtr->repToConfirm; | |||
U32 savedRep[ZSTD_REP_NUM]; | |||
/* LDM parameters */ | |||
int const extDict = ZSTD_window_hasExtDict(ldmState->window); | |||
U32 const minMatchLength = params->minMatchLength; | |||
U64 const hashPower = ldmState->hashPower; | |||
U32 const hBits = params->hashLog - params->bucketSizeLog; | |||
U32 const ldmBucketSize = 1U << params->bucketSizeLog; | |||
U32 const hashRateLog = params->hashRateLog; | |||
U32 const ldmTagMask = (1U << params->hashRateLog) - 1; | |||
/* Prefix and extDict parameters */ | |||
U32 const dictLimit = ldmState->window.dictLimit; | |||
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; | |||
BYTE const* const base = ldmState->window.base; | |||
BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; | |||
BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; | |||
BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; | |||
BYTE const* const lowPrefixPtr = base + dictLimit; | |||
/* Input bounds */ | |||
BYTE const* const istart = (BYTE const*)src; | |||
BYTE const* const iend = istart + srcSize; | |||
BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); | |||
/* Input positions */ | |||
BYTE const* anchor = istart; | |||
BYTE const* ip = istart; | |||
/* Rolling hash */ | |||
BYTE const* lastHashed = NULL; | |||
U64 rollingHash = 0; | |||
const BYTE* lastHashed = NULL; | |||
size_t i, lastLiterals; | |||
/* Save seqStorePtr->rep and copy repToConfirm */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; | |||
/* Main Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |||
while (ip <= ilimit) { | |||
size_t mLength; | |||
U32 const current = (U32)(ip - base); | |||
size_t forwardMatchLength = 0, backwardMatchLength = 0; | |||
ldmEntry_t* bestEntry = NULL; | |||
if (ip != istart) { | |||
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], | |||
lastHashed[ldmParams.minMatchLength], | |||
hashPower); | |||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], | |||
lastHashed[minMatchLength], | |||
hashPower); | |||
} else { | |||
rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); | |||
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); | |||
} | |||
lastHashed = ip; | |||
/* Do not insert and do not look for a match */ | |||
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != | |||
ldmTagMask) { | |||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { | |||
ip++; | |||
continue; | |||
} | |||
@@ -336,27 +309,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, | |||
ldmEntry_t* const bucket = | |||
ZSTD_ldm_getBucket(ldmState, | |||
ZSTD_ldm_getSmallHash(rollingHash, hBits), | |||
ldmParams); | |||
*params); | |||
ldmEntry_t* cur; | |||
size_t bestMatchLength = 0; | |||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); | |||
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { | |||
const BYTE* const pMatch = cur->offset + base; | |||
size_t curForwardMatchLength, curBackwardMatchLength, | |||
curTotalMatchLength; | |||
if (cur->checksum != checksum || cur->offset <= lowestIndex) { | |||
continue; | |||
} | |||
curForwardMatchLength = ZSTD_count(ip, pMatch, iend); | |||
if (curForwardMatchLength < ldmParams.minMatchLength) { | |||
continue; | |||
if (extDict) { | |||
BYTE const* const curMatchBase = | |||
cur->offset < dictLimit ? dictBase : base; | |||
BYTE const* const pMatch = curMatchBase + cur->offset; | |||
BYTE const* const matchEnd = | |||
cur->offset < dictLimit ? dictEnd : iend; | |||
BYTE const* const lowMatchPtr = | |||
cur->offset < dictLimit ? dictStart : lowPrefixPtr; | |||
curForwardMatchLength = ZSTD_count_2segments( | |||
ip, pMatch, iend, | |||
matchEnd, lowPrefixPtr); | |||
if (curForwardMatchLength < minMatchLength) { | |||
continue; | |||
} | |||
curBackwardMatchLength = | |||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, | |||
lowMatchPtr); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
} else { /* !extDict */ | |||
BYTE const* const pMatch = base + cur->offset; | |||
curForwardMatchLength = ZSTD_count(ip, pMatch, iend); | |||
if (curForwardMatchLength < minMatchLength) { | |||
continue; | |||
} | |||
curBackwardMatchLength = | |||
ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, | |||
lowPrefixPtr); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
} | |||
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( | |||
ip, anchor, pMatch, lowest); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
if (curTotalMatchLength > bestMatchLength) { | |||
bestMatchLength = curTotalMatchLength; | |||
@@ -371,7 +366,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, | |||
if (bestEntry == NULL) { | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, | |||
hBits, current, | |||
ldmParams); | |||
*params); | |||
ip++; | |||
continue; | |||
} | |||
@@ -380,324 +375,245 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, | |||
mLength = forwardMatchLength + backwardMatchLength; | |||
ip -= backwardMatchLength; | |||
/* Call the block compressor on the remaining literals */ | |||
{ | |||
/* Store the sequence: | |||
* ip = current - backwardMatchLength | |||
* The match is at (bestEntry->offset - backwardMatchLength) | |||
*/ | |||
U32 const matchIndex = bestEntry->offset; | |||
const BYTE* const match = base + matchIndex - backwardMatchLength; | |||
U32 const offset = (U32)(ip - match); | |||
/* Overwrite rep codes */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = repToConfirm[i]; | |||
/* Fill tables for block compressor */ | |||
ZSTD_ldm_limitTableUpdate(cctx, anchor); | |||
ZSTD_ldm_fillFastTables(cctx, anchor); | |||
/* Call block compressor and get remaining literals */ | |||
lastLiterals = blockCompressor(cctx, anchor, ip - anchor); | |||
cctx->nextToUpdate = (U32)(ip - base); | |||
/* Update repToConfirm with the new offset */ | |||
for (i = ZSTD_REP_NUM - 1; i > 0; i--) | |||
repToConfirm[i] = repToConfirm[i-1]; | |||
repToConfirm[0] = offset; | |||
/* Store the sequence with the leftover literals */ | |||
ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, | |||
offset + ZSTD_REP_MOVE, mLength - MINMATCH); | |||
U32 const offset = current - matchIndex; | |||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; | |||
/* Out of sequence storage */ | |||
if (rawSeqStore->size == rawSeqStore->capacity) | |||
return ERROR(dstSize_tooSmall); | |||
seq->litLength = (U32)(ip - anchor); | |||
seq->matchLength = (U32)mLength; | |||
seq->offset = offset; | |||
rawSeqStore->size++; | |||
} | |||
/* Insert the current entry into the hash table */ | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, | |||
(U32)(lastHashed - base), | |||
ldmParams); | |||
*params); | |||
assert(ip + backwardMatchLength == lastHashed); | |||
/* Fill the hash table from lastHashed+1 to ip+mLength*/ | |||
/* Heuristic: don't need to fill the entire table at end of block */ | |||
if (ip + mLength < ilimit) { | |||
if (ip + mLength <= ilimit) { | |||
rollingHash = ZSTD_ldm_fillLdmHashTable( | |||
ldmState, rollingHash, lastHashed, | |||
ip + mLength, base, hBits, ldmParams); | |||
ip + mLength, base, hBits, *params); | |||
lastHashed = ip + mLength - 1; | |||
} | |||
ip += mLength; | |||
anchor = ip; | |||
/* Check immediate repcode */ | |||
while ( (ip < ilimit) | |||
&& ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest)) | |||
&& (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) { | |||
size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1], | |||
iend) + 4; | |||
/* Swap repToConfirm[1] <=> repToConfirm[0] */ | |||
{ | |||
U32 const tmpOff = repToConfirm[1]; | |||
repToConfirm[1] = repToConfirm[0]; | |||
repToConfirm[0] = tmpOff; | |||
} | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); | |||
/* Fill the hash table from lastHashed+1 to ip+rLength*/ | |||
if (ip + rLength < ilimit) { | |||
rollingHash = ZSTD_ldm_fillLdmHashTable( | |||
ldmState, rollingHash, lastHashed, | |||
ip + rLength, base, hBits, ldmParams); | |||
lastHashed = ip + rLength - 1; | |||
} | |||
ip += rLength; | |||
anchor = ip; | |||
} | |||
} | |||
/* Overwrite rep */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = repToConfirm[i]; | |||
ZSTD_ldm_limitTableUpdate(cctx, anchor); | |||
ZSTD_ldm_fillFastTables(cctx, anchor); | |||
lastLiterals = blockCompressor(cctx, anchor, iend - anchor); | |||
cctx->nextToUpdate = (U32)(iend - base); | |||
/* Restore seqStorePtr->rep */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = savedRep[i]; | |||
/* Return the last literals size */ | |||
return lastLiterals; | |||
return iend - anchor; | |||
} | |||
size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
/*! ZSTD_ldm_reduceTable() : | |||
* reduce table indexes by `reducerValue` */ | |||
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, | |||
U32 const reducerValue) | |||
{ | |||
return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize); | |||
U32 u; | |||
for (u = 0; u < size; u++) { | |||
if (table[u].offset < reducerValue) table[u].offset = 0; | |||
else table[u].offset -= reducerValue; | |||
} | |||
} | |||
static size_t ZSTD_compressBlock_ldm_extDict_generic( | |||
ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
size_t ZSTD_ldm_generateSequences( | |||
ldmState_t* ldmState, rawSeqStore_t* sequences, | |||
ldmParams_t const* params, void const* src, size_t srcSize) | |||
{ | |||
ldmState_t* const ldmState = &(ctx->ldmState); | |||
const ldmParams_t ldmParams = ctx->appliedParams.ldmParams; | |||
const U64 hashPower = ldmState->hashPower; | |||
const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; | |||
const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); | |||
const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; | |||
seqStore_t* const seqStorePtr = &(ctx->seqStore); | |||
const BYTE* const base = ctx->base; | |||
const BYTE* const dictBase = ctx->dictBase; | |||
const BYTE* const istart = (const BYTE*)src; | |||
const BYTE* ip = istart; | |||
const BYTE* anchor = istart; | |||
const U32 lowestIndex = ctx->lowLimit; | |||
const BYTE* const dictStart = dictBase + lowestIndex; | |||
const U32 dictLimit = ctx->dictLimit; | |||
const BYTE* const lowPrefixPtr = base + dictLimit; | |||
const BYTE* const dictEnd = dictBase + dictLimit; | |||
const BYTE* const iend = istart + srcSize; | |||
const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE); | |||
const ZSTD_blockCompressor blockCompressor = | |||
ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1); | |||
U32* const repToConfirm = seqStorePtr->repToConfirm; | |||
U32 savedRep[ZSTD_REP_NUM]; | |||
U64 rollingHash = 0; | |||
const BYTE* lastHashed = NULL; | |||
size_t i, lastLiterals; | |||
/* Save seqStorePtr->rep and copy repToConfirm */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) { | |||
savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; | |||
} | |||
/* Search Loop */ | |||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |||
size_t mLength; | |||
const U32 current = (U32)(ip-base); | |||
size_t forwardMatchLength = 0, backwardMatchLength = 0; | |||
ldmEntry_t* bestEntry = NULL; | |||
if (ip != istart) { | |||
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], | |||
lastHashed[ldmParams.minMatchLength], | |||
hashPower); | |||
U32 const maxDist = 1U << params->windowLog; | |||
BYTE const* const istart = (BYTE const*)src; | |||
BYTE const* const iend = istart + srcSize; | |||
size_t const kMaxChunkSize = 1 << 20; | |||
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); | |||
size_t chunk; | |||
size_t leftoverSize = 0; | |||
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); | |||
/* Check that ZSTD_window_update() has been called for this chunk prior | |||
* to passing it to this function. | |||
*/ | |||
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); | |||
/* The input could be very large (in zstdmt), so it must be broken up into | |||
* chunks to enforce the maximum distance and handle overflow correction. | |||
*/ | |||
assert(sequences->pos <= sequences->size); | |||
assert(sequences->size <= sequences->capacity); | |||
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { | |||
BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; | |||
size_t const remaining = (size_t)(iend - chunkStart); | |||
BYTE const *const chunkEnd = | |||
(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; | |||
size_t const chunkSize = chunkEnd - chunkStart; | |||
size_t newLeftoverSize; | |||
size_t const prevSize = sequences->size; | |||
assert(chunkStart < iend); | |||
/* 1. Perform overflow correction if necessary. */ | |||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { | |||
U32 const ldmHSize = 1U << params->hashLog; | |||
U32 const correction = ZSTD_window_correctOverflow( | |||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); | |||
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); | |||
/* invalidate dictionaries on overflow correction */ | |||
ldmState->loadedDictEnd = 0; | |||
} | |||
/* 2. We enforce the maximum offset allowed. | |||
* | |||
* kMaxChunkSize should be small enough that we don't lose too much of | |||
* the window through early invalidation. | |||
* TODO: * Test the chunk size. | |||
* * Try invalidation after the sequence generation and test the | |||
* the offset against maxDist directly. | |||
* | |||
* NOTE: Because of dictionaries + sequence splitting we MUST make sure | |||
* that any offset used is valid at the END of the sequence, since it may | |||
* be split into two sequences. This condition holds when using | |||
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets | |||
* against maxDist directly, we'll have to carefully handle that case. | |||
*/ | |||
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); | |||
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ | |||
newLeftoverSize = ZSTD_ldm_generateSequences_internal( | |||
ldmState, sequences, params, chunkStart, chunkSize); | |||
if (ZSTD_isError(newLeftoverSize)) | |||
return newLeftoverSize; | |||
/* 4. We add the leftover literals from previous iterations to the first | |||
* newly generated sequence, or add the `newLeftoverSize` if none are | |||
* generated. | |||
*/ | |||
/* Prepend the leftover literals from the last call */ | |||
if (prevSize < sequences->size) { | |||
sequences->seq[prevSize].litLength += (U32)leftoverSize; | |||
leftoverSize = newLeftoverSize; | |||
} else { | |||
rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); | |||
assert(newLeftoverSize == chunkSize); | |||
leftoverSize += chunkSize; | |||
} | |||
lastHashed = ip; | |||
} | |||
return 0; | |||
} | |||
if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != | |||
ldmTagMask) { | |||
/* Don't insert and don't look for a match */ | |||
ip++; | |||
continue; | |||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { | |||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { | |||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; | |||
if (srcSize <= seq->litLength) { | |||
/* Skip past srcSize literals */ | |||
seq->litLength -= (U32)srcSize; | |||
return; | |||
} | |||
/* Get the best entry and compute the match lengths */ | |||
{ | |||
ldmEntry_t* const bucket = | |||
ZSTD_ldm_getBucket(ldmState, | |||
ZSTD_ldm_getSmallHash(rollingHash, hBits), | |||
ldmParams); | |||
ldmEntry_t* cur; | |||
size_t bestMatchLength = 0; | |||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); | |||
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { | |||
const BYTE* const curMatchBase = | |||
cur->offset < dictLimit ? dictBase : base; | |||
const BYTE* const pMatch = curMatchBase + cur->offset; | |||
const BYTE* const matchEnd = | |||
cur->offset < dictLimit ? dictEnd : iend; | |||
const BYTE* const lowMatchPtr = | |||
cur->offset < dictLimit ? dictStart : lowPrefixPtr; | |||
size_t curForwardMatchLength, curBackwardMatchLength, | |||
curTotalMatchLength; | |||
if (cur->checksum != checksum || cur->offset <= lowestIndex) { | |||
continue; | |||
} | |||
curForwardMatchLength = ZSTD_count_2segments( | |||
ip, pMatch, iend, | |||
matchEnd, lowPrefixPtr); | |||
if (curForwardMatchLength < ldmParams.minMatchLength) { | |||
continue; | |||
} | |||
curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( | |||
ip, anchor, pMatch, lowMatchPtr); | |||
curTotalMatchLength = curForwardMatchLength + | |||
curBackwardMatchLength; | |||
if (curTotalMatchLength > bestMatchLength) { | |||
bestMatchLength = curTotalMatchLength; | |||
forwardMatchLength = curForwardMatchLength; | |||
backwardMatchLength = curBackwardMatchLength; | |||
bestEntry = cur; | |||
srcSize -= seq->litLength; | |||
seq->litLength = 0; | |||
if (srcSize < seq->matchLength) { | |||
/* Skip past the first srcSize of the match */ | |||
seq->matchLength -= (U32)srcSize; | |||
if (seq->matchLength < minMatch) { | |||
/* The match is too short, omit it */ | |||
if (rawSeqStore->pos + 1 < rawSeqStore->size) { | |||
seq[1].litLength += seq[0].matchLength; | |||
} | |||
rawSeqStore->pos++; | |||
} | |||
return; | |||
} | |||
srcSize -= seq->matchLength; | |||
seq->matchLength = 0; | |||
rawSeqStore->pos++; | |||
} | |||
} | |||
/* No match found -- continue searching */ | |||
if (bestEntry == NULL) { | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, | |||
(U32)(lastHashed - base), | |||
ldmParams); | |||
ip++; | |||
continue; | |||
/** | |||
* If the sequence length is longer than remaining then the sequence is split | |||
* between this block and the next. | |||
* | |||
* Returns the current sequence to handle, or if the rest of the block should | |||
* be literals, it returns a sequence with offset == 0. | |||
*/ | |||
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, | |||
U32 const remaining, U32 const minMatch) | |||
{ | |||
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; | |||
assert(sequence.offset > 0); | |||
/* Likely: No partial sequence */ | |||
if (remaining >= sequence.litLength + sequence.matchLength) { | |||
rawSeqStore->pos++; | |||
return sequence; | |||
} | |||
/* Cut the sequence short (offset == 0 ==> rest is literals). */ | |||
if (remaining <= sequence.litLength) { | |||
sequence.offset = 0; | |||
} else if (remaining < sequence.litLength + sequence.matchLength) { | |||
sequence.matchLength = remaining - sequence.litLength; | |||
if (sequence.matchLength < minMatch) { | |||
sequence.offset = 0; | |||
} | |||
} | |||
/* Skip past `remaining` bytes for the future sequences. */ | |||
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); | |||
return sequence; | |||
} | |||
/* Match found */ | |||
mLength = forwardMatchLength + backwardMatchLength; | |||
ip -= backwardMatchLength; | |||
/* Call the block compressor on the remaining literals */ | |||
{ | |||
/* ip = current - backwardMatchLength | |||
* The match is at (bestEntry->offset - backwardMatchLength) */ | |||
U32 const matchIndex = bestEntry->offset; | |||
U32 const offset = current - matchIndex; | |||
/* Overwrite rep codes */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = repToConfirm[i]; | |||
/* Fill the hash table for the block compressor */ | |||
ZSTD_ldm_limitTableUpdate(ctx, anchor); | |||
ZSTD_ldm_fillFastTables(ctx, anchor); | |||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize) | |||
{ | |||
const ZSTD_compressionParameters* const cParams = &ms->cParams; | |||
unsigned const minMatch = cParams->minMatch; | |||
ZSTD_blockCompressor const blockCompressor = | |||
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); | |||
/* Input bounds */ | |||
BYTE const* const istart = (BYTE const*)src; | |||
BYTE const* const iend = istart + srcSize; | |||
/* Input positions */ | |||
BYTE const* ip = istart; | |||
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); | |||
assert(rawSeqStore->pos <= rawSeqStore->size); | |||
assert(rawSeqStore->size <= rawSeqStore->capacity); | |||
/* Loop through each sequence and apply the block compressor to the lits */ | |||
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { | |||
/* maybeSplitSequence updates rawSeqStore->pos */ | |||
rawSeq const sequence = maybeSplitSequence(rawSeqStore, | |||
(U32)(iend - ip), minMatch); | |||
int i; | |||
/* End signal */ | |||
if (sequence.offset == 0) | |||
break; | |||
/* Call block compressor and get remaining literals */ | |||
lastLiterals = blockCompressor(ctx, anchor, ip - anchor); | |||
ctx->nextToUpdate = (U32)(ip - base); | |||
assert(ip + sequence.litLength + sequence.matchLength <= iend); | |||
/* Update repToConfirm with the new offset */ | |||
/* Fill tables for block compressor */ | |||
ZSTD_ldm_limitTableUpdate(ms, ip); | |||
ZSTD_ldm_fillFastTables(ms, ip); | |||
/* Run the block compressor */ | |||
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); | |||
{ | |||
size_t const newLitLength = | |||
blockCompressor(ms, seqStore, rep, ip, sequence.litLength); | |||
ip += sequence.litLength; | |||
/* Update the repcodes */ | |||
for (i = ZSTD_REP_NUM - 1; i > 0; i--) | |||
repToConfirm[i] = repToConfirm[i-1]; | |||
repToConfirm[0] = offset; | |||
/* Store the sequence with the leftover literals */ | |||
ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, | |||
offset + ZSTD_REP_MOVE, mLength - MINMATCH); | |||
} | |||
/* Insert the current entry into the hash table */ | |||
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, | |||
(U32)(lastHashed - base), | |||
ldmParams); | |||
/* Fill the hash table from lastHashed+1 to ip+mLength */ | |||
assert(ip + backwardMatchLength == lastHashed); | |||
if (ip + mLength < ilimit) { | |||
rollingHash = ZSTD_ldm_fillLdmHashTable( | |||
ldmState, rollingHash, lastHashed, | |||
ip + mLength, base, hBits, | |||
ldmParams); | |||
lastHashed = ip + mLength - 1; | |||
} | |||
ip += mLength; | |||
anchor = ip; | |||
/* check immediate repcode */ | |||
while (ip < ilimit) { | |||
U32 const current2 = (U32)(ip-base); | |||
U32 const repIndex2 = current2 - repToConfirm[1]; | |||
const BYTE* repMatch2 = repIndex2 < dictLimit ? | |||
dictBase + repIndex2 : base + repIndex2; | |||
if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & | |||
(repIndex2 > lowestIndex)) /* intentional overflow */ | |||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |||
const BYTE* const repEnd2 = repIndex2 < dictLimit ? | |||
dictEnd : iend; | |||
size_t const repLength2 = | |||
ZSTD_count_2segments(ip+4, repMatch2+4, iend, | |||
repEnd2, lowPrefixPtr) + 4; | |||
U32 tmpOffset = repToConfirm[1]; | |||
repToConfirm[1] = repToConfirm[0]; | |||
repToConfirm[0] = tmpOffset; | |||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); | |||
/* Fill the hash table from lastHashed+1 to ip+repLength2*/ | |||
if (ip + repLength2 < ilimit) { | |||
rollingHash = ZSTD_ldm_fillLdmHashTable( | |||
ldmState, rollingHash, lastHashed, | |||
ip + repLength2, base, hBits, | |||
ldmParams); | |||
lastHashed = ip + repLength2 - 1; | |||
} | |||
ip += repLength2; | |||
anchor = ip; | |||
continue; | |||
} | |||
break; | |||
rep[i] = rep[i-1]; | |||
rep[0] = sequence.offset; | |||
/* Store the sequence */ | |||
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, | |||
sequence.offset + ZSTD_REP_MOVE, | |||
sequence.matchLength - MINMATCH); | |||
ip += sequence.matchLength; | |||
} | |||
} | |||
/* Overwrite rep */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = repToConfirm[i]; | |||
ZSTD_ldm_limitTableUpdate(ctx, anchor); | |||
ZSTD_ldm_fillFastTables(ctx, anchor); | |||
/* Call the block compressor one last time on the last literals */ | |||
lastLiterals = blockCompressor(ctx, anchor, iend - anchor); | |||
ctx->nextToUpdate = (U32)(iend - base); | |||
/* Restore seqStorePtr->rep */ | |||
for (i = 0; i < ZSTD_REP_NUM; i++) | |||
seqStorePtr->rep[i] = savedRep[i]; | |||
/* Return the last literals size */ | |||
return lastLiterals; | |||
} | |||
size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize) | |||
{ | |||
return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize); | |||
/* Fill the tables for the block compressor */ | |||
ZSTD_ldm_limitTableUpdate(ms, ip); | |||
ZSTD_ldm_fillFastTables(ms, ip); | |||
/* Compress the last literals */ | |||
return blockCompressor(ms, seqStore, rep, ip, iend - ip); | |||
} |
@@ -1,64 +1,107 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTD_LDM_H | |||
#define ZSTD_LDM_H | |||
#include "zstd_compress.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ | |||
#include "../zstd.h" /* ZSTD_CCtx, size_t */ | |||
/*-************************************* | |||
* Long distance matching | |||
***************************************/ | |||
#define ZSTD_LDM_WINDOW_LOG 27 | |||
#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999 | |||
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT | |||
/** ZSTD_compressBlock_ldm_generic() : | |||
void ZSTD_ldm_fillHashTable( | |||
ldmState_t* state, const BYTE* ip, | |||
const BYTE* iend, ldmParams_t const* params); | |||
/** | |||
* ZSTD_ldm_generateSequences(): | |||
* | |||
* Generates the sequences using the long distance match finder. | |||
* Generates long range matching sequences in `sequences`, which parse a prefix | |||
* of the source. `sequences` must be large enough to store every sequence, | |||
* which can be checked with `ZSTD_ldm_getMaxNbSeq()`. | |||
* @returns 0 or an error code. | |||
* | |||
* This is a block compressor intended for long distance matching. | |||
* NOTE: The user must have called ZSTD_window_update() for all of the input | |||
* they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. | |||
* NOTE: This function returns an error if it runs out of space to store | |||
* sequences. | |||
*/ | |||
size_t ZSTD_ldm_generateSequences( | |||
ldmState_t* ldms, rawSeqStore_t* sequences, | |||
ldmParams_t const* params, void const* src, size_t srcSize); | |||
/** | |||
* ZSTD_ldm_blockCompress(): | |||
* | |||
* The function searches for matches of length at least | |||
* ldmParams.minMatchLength using a hash table in cctx->ldmState. | |||
* Matches can be at a distance of up to cParams.windowLog. | |||
* Compresses a block using the predefined sequences, along with a secondary | |||
* block compressor. The literals section of every sequence is passed to the | |||
* secondary block compressor, and those sequences are interspersed with the | |||
* predefined sequences. Returns the length of the last literals. | |||
* Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. | |||
* `rawSeqStore.seq` may also be updated to split the last sequence between two | |||
* blocks. | |||
* @return The length of the last literals. | |||
* | |||
* Upon finding a match, the unmatched literals are compressed using a | |||
* ZSTD_blockCompressor (depending on the strategy in the compression | |||
* parameters), which stores the matched sequences. The "long distance" | |||
* match is then stored with the remaining literals from the | |||
* ZSTD_blockCompressor. */ | |||
size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, | |||
const void* src, size_t srcSize); | |||
* NOTE: The source must be at most the maximum block size, but the predefined | |||
* sequences can be any size, and may be longer than the block. In the case that | |||
* they are longer than the block, the last sequences may need to be split into | |||
* two. We handle that case correctly, and update `rawSeqStore` appropriately. | |||
* NOTE: This function does not return any errors. | |||
*/ | |||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
/** ZSTD_ldm_initializeParameters() : | |||
* Initialize the long distance matching parameters to their default values. */ | |||
size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm); | |||
/** | |||
* ZSTD_ldm_skipSequences(): | |||
* | |||
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. | |||
* Avoids emitting matches less than `minMatch` bytes. | |||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress(). | |||
*/ | |||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, | |||
U32 const minMatch); | |||
/** ZSTD_ldm_getTableSize() : | |||
* Estimate the space needed for long distance matching tables. */ | |||
size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog); | |||
/** ZSTD_ldm_getTableSize() : | |||
* Return prime8bytes^(minMatchLength-1) */ | |||
U64 ZSTD_ldm_getHashPower(U32 minMatchLength); | |||
* Estimate the space needed for long distance matching tables or 0 if LDM is | |||
* disabled. | |||
*/ | |||
size_t ZSTD_ldm_getTableSize(ldmParams_t params); | |||
/** ZSTD_ldm_getSeqSpace() : | |||
* Return an upper bound on the number of sequences that can be produced by | |||
* the long distance matcher, or 0 if LDM is disabled. | |||
*/ | |||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); | |||
/** ZSTD_ldm_adjustParameters() : | |||
* If the params->hashEveryLog is not set, set it to its default value based on | |||
* If the params->hashRateLog is not set, set it to its default value based on | |||
* windowLog and params->hashLog. | |||
* | |||
* Ensures that params->bucketSizeLog is <= params->hashLog (setting it to | |||
* params->hashLog if it is not). */ | |||
void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog); | |||
* params->hashLog if it is not). | |||
* | |||
* Ensures that the minMatchLength >= targetLength during optimal parsing. | |||
*/ | |||
void ZSTD_ldm_adjustParameters(ldmParams_t* params, | |||
ZSTD_compressionParameters const* cParams); | |||
#if defined (__cplusplus) | |||
} |
@@ -1,5 +1,5 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
@@ -11,17 +11,43 @@ | |||
#ifndef ZSTD_OPT_H | |||
#define ZSTD_OPT_H | |||
#include "zstd_compress.h" | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
#include "zstd_compress_internal.h" | |||
/* used in ZSTD_loadDictionaryContent() */ | |||
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); | |||
size_t ZSTD_compressBlock_btopt( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra2( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btopt_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra_dictMatchState( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btopt_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra_extDict( | |||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |||
void const* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); | |||
/* note : no btultra2 variant for extDict nor dictMatchState, | |||
* because btultra2 is not meant to work with dictionaries | |||
* and is only specific for the first block (no prefix) */ | |||
#if defined (__cplusplus) | |||
} |
@@ -1,134 +0,0 @@ | |||
/* | |||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |||
* All rights reserved. | |||
* | |||
* This source code is licensed under both the BSD-style license (found in the | |||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | |||
* in the COPYING file in the root directory of this source tree). | |||
* You may select, at your option, one of the above-listed licenses. | |||
*/ | |||
#ifndef ZSTDMT_COMPRESS_H | |||
#define ZSTDMT_COMPRESS_H | |||
#if defined (__cplusplus) | |||
extern "C" { | |||
#endif | |||
/* Note : This is an internal API. | |||
* Some methods are still exposed (ZSTDLIB_API), | |||
* because it used to be the only way to invoke MT compression. | |||
* Now, it's recommended to use ZSTD_compress_generic() instead. | |||
* These methods will stop being exposed in a future version */ | |||
/* === Dependencies === */ | |||
#include <stddef.h> /* size_t */ | |||
#ifndef ZSTD_STATIC_LINKING_ONLY | |||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ | |||
#endif | |||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ | |||
/* === Memory management === */ | |||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; | |||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); | |||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, | |||
ZSTD_customMem cMem); | |||
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); | |||
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); | |||
/* === Simple buffer-to-butter one-pass function === */ | |||
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
int compressionLevel); | |||
/* === Streaming functions === */ | |||
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); | |||
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ | |||
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |||
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |||
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |||
/* === Advanced functions and parameters === */ | |||
#ifndef ZSTDMT_SECTION_SIZE_MIN | |||
# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ | |||
#endif | |||
ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, | |||
void* dst, size_t dstCapacity, | |||
const void* src, size_t srcSize, | |||
const ZSTD_CDict* cdict, | |||
ZSTD_parameters const params, | |||
unsigned overlapLog); | |||
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, | |||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ | |||
ZSTD_parameters params, | |||
unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ | |||
ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, | |||
const ZSTD_CDict* cdict, | |||
ZSTD_frameParameters fparams, | |||
unsigned long long pledgedSrcSize); /* note : zero means empty */ | |||
/* ZSTDMT_parameter : | |||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ | |||
typedef enum { | |||
ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ | |||
ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */ | |||
} ZSTDMT_parameter; | |||
/* ZSTDMT_setMTCtxParameter() : | |||
* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. | |||
* The function must be called typically after ZSTD_createCCtx(). | |||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. | |||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */ | |||
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value); | |||
/*! ZSTDMT_compressStream_generic() : | |||
* Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream() | |||
* depending on flush directive. | |||
* @return : minimum amount of data still to be flushed | |||
* 0 if fully flushed | |||
* or an error code */ | |||
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, | |||
ZSTD_outBuffer* output, | |||
ZSTD_inBuffer* input, | |||
ZSTD_EndDirective endOp); | |||
/* === Private definitions; never ever use directly === */ | |||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value); | |||
size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads); | |||
/*! ZSTDMT_initCStream_internal() : | |||
* Private use only. Init streaming operation. | |||
* expects params to be valid. | |||
* must receive dict, or cdict, or none, but not both. | |||
* @return : 0, or an error code */ | |||
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, | |||
const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode, | |||
const ZSTD_CDict* cdict, | |||
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); | |||
#if defined (__cplusplus) | |||
} | |||
#endif | |||
#endif /* ZSTDMT_COMPRESS_H */ |
@@ -20,7 +20,6 @@ | |||
#include "libserver/protocol_internal.h" | |||
#include "unix-std.h" | |||
#include "contrib/zstd/zstd.h" | |||
#include "contrib/zstd/zdict.h" | |||
#ifdef HAVE_FETCH_H | |||
#include <fetch.h> | |||
@@ -391,7 +390,7 @@ rspamd_client_command (struct rspamd_client_connection *conn, | |||
return FALSE; | |||
} | |||
dict_id = ZDICT_getDictID (comp_dictionary, dict_len); | |||
dict_id = -1; | |||
if (dict_id == 0) { | |||
g_set_error (err, RCLIENT_ERROR, errno, |
@@ -38,7 +38,6 @@ | |||
#define ZSTD_STATIC_LINKING_ONLY | |||
#include "contrib/zstd/zstd.h" | |||
#include "contrib/zstd/zdict.h" | |||
#ifdef HAVE_OPENSSL | |||
#include <openssl/rand.h> | |||
@@ -2764,7 +2763,7 @@ rspamd_open_zstd_dictionary (const char *path) | |||
return NULL; | |||
} | |||
dict->id = ZDICT_getDictID (dict->dict, dict->size); | |||
dict->id = -1; | |||
if (dict->id == 0) { | |||
g_free (dict); |