6 years ago · 0050cedd31
--- a/contrib/zstd/CMakeLists.txt
+++ b/contrib/zstd/CMakeLists.txt
@@ -1,20 +1,27 @@
 SET(ZSTDSRC
 			cover.c
 			divsufsort.c
 			entropy_common.c
 			error_private.c
 			fse_compress.c
 			fse_decompress.c
 			huf_compress.c
 			huf_decompress.c
 			pool.c
 			threading.c
 			zdict.c
 			zstd_common.c
 			zstd_compress.c
 			zstd_decompress.c)
 		cover.c
 		divsufsort.c
 		entropy_common.c
 		error_private.c
 		fse_compress.c
 		fse_decompress.c
 		huf_compress.c
 		huf_decompress.c
 		pool.c
 		threading.c
 		zdict.c
 		zstd_common.c
 		zstd_compress.c
 		zstd_decompress.c
 		zstd_double_fast.c
 		zstd_fast.c
 		zstd_lazy.c
 		zstd_ldm.c
 		zstdmt_compress.c
 		zstd_opt.c)

 ADD_LIBRARY(rspamd-zstd STATIC ${ZSTDSRC})
 ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY)

 IF(ENABLE_FULL_DEBUG MATCHES "OFF")
 if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
--- a/contrib/zstd/bitstream.h
+++ b/contrib/zstd/bitstream.h
@@ -58,7 +58,9 @@ extern "C" {
 #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
 #  include <assert.h>
 #else
 #  define assert(condition) ((void)0)
 #  ifndef assert
 #    define assert(condition) ((void)0)
 #  endif
 #endif


@@ -73,13 +75,14 @@ extern "C" {
 #define STREAM_ACCUMULATOR_MIN_64  57
 #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))


 /*-******************************************
 *  bitStream encoding API (write forward)
 ********************************************/
 /* bitStream can mix input from multiple sources.
 *  A critical property of these streams is that they encode and decode in **reverse** direction.
 *  So the first bit sequence you add will be the last to be read, like a LIFO stack.
 */
 * A critical property of these streams is that they encode and decode in **reverse** direction.
 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
 */
 typedef struct
 {
    size_t bitContainer;
@@ -166,33 +169,39 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
 ****************************************************************/
 MEM_STATIC unsigned BIT_highbit32 (register U32 val)
 {
    assert(val != 0);
    {
 #   if defined(_MSC_VER)   /* Visual */
    unsigned long r=0;
    _BitScanReverse ( &r, val );
    return (unsigned) r;
        unsigned long r=0;
        _BitScanReverse ( &r, val );
        return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
    return 31 - __builtin_clz (val);
        return 31 - __builtin_clz (val);
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
                                             11, 14, 16, 18, 22, 25,  3, 30,
                                              8, 12, 20, 28, 15, 17, 24,  7,
                                             19, 27, 23,  6, 26,  5,  4, 31 };
    U32 v = val;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
                                                 11, 14, 16, 18, 22, 25,  3, 30,
                                                  8, 12, 20, 28, 15, 17, 24,  7,
                                                 19, 27, 23,  6, 26,  5,  4, 31 };
        U32 v = val;
        v |= v >> 1;
        v |= v >> 2;
        v |= v >> 4;
        v |= v >> 8;
        v |= v >> 16;
        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
 #   endif
    }
 }

 /*=====    Local Constants   =====*/
 static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
                                    0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF,
                                    0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
                                    0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF };   /* up to 26 bits */

 static const unsigned BIT_mask[] = {
    0,          1,         3,         7,         0xF,       0x1F,
    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))

 /*-**************************************************************
 *  bitStream encoding
@@ -200,7 +209,7 @@ static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
 /*! BIT_initCStream() :
 *  `dstCapacity` must be > sizeof(size_t)
 *  @return : 0 if success,
              otherwise an error code (can be tested using ERR_isError() ) */
 *            otherwise an error code (can be tested using ERR_isError()) */
 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
                                  void* startPtr, size_t dstCapacity)
 {
@@ -214,11 +223,14 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
 }

 /*! BIT_addBits() :
    can add up to 26 bits into `bitC`.
    Does not check for register overflow ! */
 *  can add up to 31 bits into `bitC`.
 *  Note : does not check for register overflow ! */
 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
                            size_t value, unsigned nbBits)
 {
    MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
    assert(nbBits < BIT_MASK_SIZE);
    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
    bitC->bitPos += nbBits;
 }
@@ -229,6 +241,7 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
                                size_t value, unsigned nbBits)
 {
    assert((value>>nbBits) == 0);
    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    bitC->bitContainer |= value << bitC->bitPos;
    bitC->bitPos += nbBits;
 }
@@ -239,7 +252,7 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    assert(bitC->ptr <= bitC->endPtr);
@@ -255,7 +268,7 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert( bitC->bitPos <= (sizeof(bitC->bitContainer)*8) );
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -265,7 +278,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)

 /*! BIT_closeCStream() :
 *  @return : size of CStream, in bytes,
              or 0 if it could not fit into dstBuffer */
 *            or 0 if it could not fit into dstBuffer */
 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
 {
    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
@@ -276,14 +289,14 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)


 /*-********************************************************
 * bitStream decoding
 *  bitStream decoding
 **********************************************************/
 /*! BIT_initDStream() :
 *   Initialize a BIT_DStream_t.
 *   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
 *   `srcSize` must be the *exact* size of the bitStream, in bytes.
 *   @return : size of stream (== srcSize) or an errorCode if a problem is detected
 */
 *  Initialize a BIT_DStream_t.
 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
 * `srcSize` must be the *exact* size of the bitStream, in bytes.
 * @return : size of stream (== srcSize), or an errorCode if a problem is detected
 */
 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
 {
    if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
@@ -302,29 +315,30 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
        bitD->bitContainer = *(const BYTE*)(bitD->start);
        switch(srcSize)
        {
 	    case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
 	            /* fall-through */
        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
                /* fall-through */

 	    case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
 	            /* fall-through */
        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
                /* fall-through */

 	    case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
 	            /* fall-through */
        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
                /* fall-through */

 	    case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
 	            /* fall-through */
        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
                /* fall-through */

 	    case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
 	            /* fall-through */
        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
                /* fall-through */

 	    case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
 	            /* fall-through */
        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
                /* fall-through */

            default: break;
        default: break;
        }
        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
        }
        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
    }

@@ -346,12 +360,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
 #  endif
        return _bextr_u32(bitContainer, start, nbBits);
 #else
    assert(nbBits < BIT_MASK_SIZE);
    return (bitContainer >> start) & BIT_mask[nbBits];
 #endif
 }

 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 {
    assert(nbBits < BIT_MASK_SIZE);
    return bitContainer & BIT_mask[nbBits];
 }

@@ -360,9 +376,8 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 *  local register is not modified.
 *  On 32-bits, maxNbBits==24.
 *  On 64-bits, maxNbBits==56.
 *  @return : value extracted
 */
 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
 * @return : value extracted */
 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
 {
 #if defined(__BMI__) && defined(__GNUC__)   /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
@@ -389,8 +404,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 /*! BIT_readBits() :
 *  Read (consume) next n bits from local register and update.
 *  Pay attention to not read more than nbBits contained into local register.
 *  @return : extracted value.
 */
 * @return : extracted value. */
 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 {
    size_t const value = BIT_lookBits(bitD, nbBits);
@@ -399,7 +413,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
 }

 /*! BIT_readBitsFast() :
 *   unsafe version; only works only if nbBits >= 1 */
 *  unsafe version; only works only if nbBits >= 1 */
 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 {
    size_t const value = BIT_lookBitsFast(bitD, nbBits);
@@ -409,10 +423,10 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
 }

 /*! BIT_reloadDStream() :
 *   Refill `bitD` from buffer previously set in BIT_initDStream() .
 *   This function is safe, it guarantees it will not read beyond src buffer.
 *   @return : status of `BIT_DStream_t` internal register.
              if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
 *  Refill `bitD` from buffer previously set in BIT_initDStream() .
 *  This function is safe, it guarantees it will not read beyond src buffer.
 * @return : status of `BIT_DStream_t` internal register.
 *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
@@ -443,8 +457,8 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 }

 /*! BIT_endOfDStream() :
 *   @return Tells if DStream has exactly reached its end (all bits consumed).
 */
 * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
 */
 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
 {
    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
--- a/contrib/zstd/compiler.h
+++ b/contrib/zstd/compiler.h
@@ -0,0 +1,86 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_COMPILER_H
 #define ZSTD_COMPILER_H

 /*-*******************************************************
 *  Compiler specifics
 *********************************************************/
 /* force inlining */
 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #  define INLINE_KEYWORD inline
 #else
 #  define INLINE_KEYWORD
 #endif

 #if defined(__GNUC__)
 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #  define FORCE_INLINE_ATTR __forceinline
 #else
 #  define FORCE_INLINE_ATTR
 #endif

 /**
 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
 * parameters. They must be inlined for the compiler to elimininate the constant
 * branches.
 */
 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
 /**
 * HINT_INLINE is used to help the compiler generate better code. It is *not*
 * used for "templates", so it can be tweaked based on the compilers
 * performance.
 *
 * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
 * always_inline attribute.
 *
 * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
 * attribute.
 */
 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
 #  define HINT_INLINE static INLINE_KEYWORD
 #else
 #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
 #endif

 /* force no inlining */
 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
 #else
 #  ifdef __GNUC__
 #    define FORCE_NOINLINE static __attribute__((__noinline__))
 #  else
 #    define FORCE_NOINLINE static
 #  endif
 #endif

 /* prefetch */
 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
 #  include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #  define PREFETCH(ptr)   _mm_prefetch((const char*)ptr, _MM_HINT_T0)
 #elif defined(__GNUC__)
 #  define PREFETCH(ptr)   __builtin_prefetch(ptr, 0, 0)
 #else
 #  define PREFETCH(ptr)   /* disabled */
 #endif

 /* disable warnings */
 #ifdef _MSC_VER    /* Visual Studio */
 #  include <intrin.h>                    /* For Visual 2005 */
 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #endif

 #endif /* ZSTD_COMPILER_H */
--- a/contrib/zstd/cover.c
+++ b/contrib/zstd/cover.c
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 /* *****************************************************************************
@@ -382,7 +383,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
 typedef struct {
  U32 begin;
  U32 end;
  double score;
  U32 score;
 } COVER_segment_t;

 /**
@@ -398,7 +399,8 @@ typedef struct {
 */
 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
                                           COVER_map_t *activeDmers, U32 begin,
                                           U32 end, COVER_params_t parameters) {
                                           U32 end,
                                           ZDICT_cover_params_t parameters) {
  /* Constants */
  const U32 k = parameters.k;
  const U32 d = parameters.d;
@@ -437,7 +439,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
      activeSegment.begin += 1;
      *delDmerOcc -= 1;
      /* If this is the last occurrence of the dmer, subtract its score */
      /* If this is the last occurence of the dmer, subtract its score */
      if (*delDmerOcc == 0) {
        COVER_map_remove(activeDmers, delDmer);
        activeSegment.score -= freqs[delDmer];
@@ -478,11 +480,16 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
 * Check the validity of the parameters.
 * Returns non-zero if the parameters are valid and 0 otherwise.
 */
 static int COVER_checkParameters(COVER_params_t parameters) {
 static int COVER_checkParameters(ZDICT_cover_params_t parameters,
                                 size_t maxDictSize) {
  /* k and d are required parameters */
  if (parameters.d == 0 || parameters.k == 0) {
    return 0;
  }
  /* k <= maxDictSize */
  if (parameters.k > maxDictSize) {
    return 0;
  }
  /* d <= k */
  if (parameters.d > parameters.k) {
    return 0;
@@ -600,7 +607,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
 static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
                                    COVER_map_t *activeDmers, void *dictBuffer,
                                    size_t dictBufferCapacity,
                                    COVER_params_t parameters) {
                                    ZDICT_cover_params_t parameters) {
  BYTE *const dict = (BYTE *)dictBuffer;
  size_t tail = dictBufferCapacity;
  /* Divide the data up into epochs of equal size.
@@ -621,9 +628,13 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
    /* Select a segment */
    COVER_segment_t segment = COVER_selectSegment(
        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
    /* Trim the segment if necessary and if it is empty then we are done */
    /* If the segment covers no dmers, then we are out of content */
    if (segment.score == 0) {
      break;
    }
    /* Trim the segment if necessary and if it is too small then we are done */
    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
    if (segmentSize == 0) {
    if (segmentSize < parameters.d) {
      break;
    }
    /* We fill the dictionary from the back to allow the best segments to be
@@ -639,27 +650,15 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
  return tail;
 }

 /**
 * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
 * dictionary.
 */
 static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
  ZDICT_params_t zdictParams;
  memset(&zdictParams, 0, sizeof(zdictParams));
  zdictParams.notificationLevel = 1;
  zdictParams.dictID = parameters.dictID;
  zdictParams.compressionLevel = parameters.compressionLevel;
  return zdictParams;
 }

 ZDICTLIB_API size_t COVER_trainFromBuffer(
 ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
    const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
    const size_t *samplesSizes, unsigned nbSamples,
    ZDICT_cover_params_t parameters) {
  BYTE *const dict = (BYTE *)dictBuffer;
  COVER_ctx_t ctx;
  COVER_map_t activeDmers;
  /* Checks */
  if (!COVER_checkParameters(parameters)) {
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
    return ERROR(GENERIC);
  }
@@ -673,7 +672,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
    return ERROR(dstSize_tooSmall);
  }
  /* Initialize global data */
  g_displayLevel = parameters.notificationLevel;
  g_displayLevel = parameters.zParams.notificationLevel;
  /* Initialize context and activeDmers */
  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
                      parameters.d)) {
@@ -690,10 +689,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
    const size_t tail =
        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
                              dictBufferCapacity, parameters);
    ZDICT_params_t zdictParams = COVER_translateParams(parameters);
    const size_t dictionarySize = ZDICT_finalizeDictionary(
        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
        samplesBuffer, samplesSizes, nbSamples, zdictParams);
        samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
    if (!ZSTD_isError(dictionarySize)) {
      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
                   (U32)dictionarySize);
@@ -718,7 +716,7 @@ typedef struct COVER_best_s {
  size_t liveJobs;
  void *dict;
  size_t dictSize;
  COVER_params_t parameters;
  ZDICT_cover_params_t parameters;
  size_t compressedSize;
 } COVER_best_t;

@@ -726,11 +724,9 @@ typedef struct COVER_best_s {
 * Initialize the `COVER_best_t`.
 */
 static void COVER_best_init(COVER_best_t *best) {
  if (!best) {
    return;
  }
  pthread_mutex_init(&best->mutex, NULL);
  pthread_cond_init(&best->cond, NULL);
  if (best==NULL) return; /* compatible with init on NULL */
  (void)pthread_mutex_init(&best->mutex, NULL);
  (void)pthread_cond_init(&best->cond, NULL);
  best->liveJobs = 0;
  best->dict = NULL;
  best->dictSize = 0;
@@ -786,7 +782,7 @@ static void COVER_best_start(COVER_best_t *best) {
 * If this dictionary is the best so far save it and its parameters.
 */
 static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
                              COVER_params_t parameters, void *dict,
                              ZDICT_cover_params_t parameters, void *dict,
                              size_t dictSize) {
  if (!best) {
    return;
@@ -830,11 +826,11 @@ typedef struct COVER_tryParameters_data_s {
  const COVER_ctx_t *ctx;
  COVER_best_t *best;
  size_t dictBufferCapacity;
  COVER_params_t parameters;
  ZDICT_cover_params_t parameters;
 } COVER_tryParameters_data_t;

 /**
 * Tries a set of parameters and updates the COVER_best_t with the results.
 * Tries a set of parameters and upates the COVER_best_t with the results.
 * This function is thread safe if zstd is compiled with multithreaded support.
 * It takes its parameters as an *OWNING* opaque pointer to support threading.
 */
@@ -842,7 +838,7 @@ static void COVER_tryParameters(void *opaque) {
  /* Save parameters as local variables */
  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
  const COVER_ctx_t *const ctx = data->ctx;
  const COVER_params_t parameters = data->parameters;
  const ZDICT_cover_params_t parameters = data->parameters;
  size_t dictBufferCapacity = data->dictBufferCapacity;
  size_t totalCompressedSize = ERROR(GENERIC);
  /* Allocate space for hash table, dict, and freqs */
@@ -863,10 +859,10 @@ static void COVER_tryParameters(void *opaque) {
  {
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                              dictBufferCapacity, parameters);
    const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
    dictBufferCapacity = ZDICT_finalizeDictionary(
        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
        parameters.zParams);
    if (ZDICT_isError(dictBufferCapacity)) {
      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
      goto _cleanup;
@@ -892,13 +888,13 @@ static void COVER_tryParameters(void *opaque) {
    }
    /* Create the cctx and cdict */
    cctx = ZSTD_createCCtx();
    cdict =
        ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
    cdict = ZSTD_createCDict(dict, dictBufferCapacity,
                             parameters.zParams.compressionLevel);
    if (!dst || !cctx || !cdict) {
      goto _compressCleanup;
    }
    /* Compress each sample and sum their sizes (or error) */
    totalCompressedSize = 0;
    totalCompressedSize = dictBufferCapacity;
    for (i = 0; i < ctx->nbSamples; ++i) {
      const size_t size = ZSTD_compress_usingCDict(
          cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
@@ -930,12 +926,10 @@ _cleanup:
  }
 }

 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
                                                  size_t dictBufferCapacity,
                                                  const void *samplesBuffer,
                                                  const size_t *samplesSizes,
                                                  unsigned nbSamples,
                                                  COVER_params_t *parameters) {
 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
    const size_t *samplesSizes, unsigned nbSamples,
    ZDICT_cover_params_t *parameters) {
  /* constants */
  const unsigned nbThreads = parameters->nbThreads;
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
@@ -947,7 +941,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
  const unsigned kIterations =
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
  /* Local variables */
  const int displayLevel = parameters->notificationLevel;
  const int displayLevel = parameters->zParams.notificationLevel;
  unsigned iteration = 1;
  unsigned d;
  unsigned k;
@@ -976,7 +970,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
  /* Initialization */
  COVER_best_init(&best);
  /* Turn down global display level to clean up display at level 2 and below */
  g_displayLevel = parameters->notificationLevel - 1;
  g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
  /* Loop through d first because each new value needs a new context */
  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
                    kIterations);
@@ -1010,8 +1004,9 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
      data->parameters.k = k;
      data->parameters.d = d;
      data->parameters.steps = kSteps;
      data->parameters.zParams.notificationLevel = g_displayLevel;
      /* Check the parameters */
      if (!COVER_checkParameters(data->parameters)) {
      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
        free(data);
        continue;
--- a/contrib/zstd/error_private.c
+++ b/contrib/zstd/error_private.c
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 /* The purpose of this file is to have a single list of error strings embedded in binary */
@@ -20,18 +21,17 @@ const char* ERR_getErrorString(ERR_enum code)
    case PREFIX(GENERIC):  return "Error (generic)";
    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
    case PREFIX(version_unsupported): return "Version not supported";
    case PREFIX(parameter_unknown): return "Unknown parameter type";
    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
    case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
    case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
    case PREFIX(corruption_detected): return "Corrupted block detected";
    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
    case PREFIX(parameter_unsupported): return "Unsupported parameter";
    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
    case PREFIX(init_missing): return "Context should be init first";
    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
    case PREFIX(srcSize_wrong): return "Src size is incorrect";
    case PREFIX(corruption_detected): return "Corrupted block detected";
    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
--- a/contrib/zstd/error_private.h
+++ b/contrib/zstd/error_private.h
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 /* Note : this module is expected to remain private, do not expose it */
@@ -48,10 +49,9 @@ typedef ZSTD_ErrorCode ERR_enum;
 /*-****************************************
 *  Error codes handling
 ******************************************/
 #ifdef ERROR
 #  undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
 #endif
 #define ERROR(name) ((size_t)-PREFIX(name))
 #undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
 #define ERROR(name) ZSTD_ERROR(name)
 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))

 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }

--- a/contrib/zstd/fse.h
+++ b/contrib/zstd/fse.h
@@ -31,13 +31,14 @@
   You can contact the author at :
   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
 #ifndef FSE_H
 #define FSE_H

 #if defined (__cplusplus)
 extern "C" {
 #endif

 #ifndef FSE_H
 #define FSE_H


 /*-*****************************************
 *  Dependencies
@@ -297,8 +298,10 @@ FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<
 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
 */

 #endif  /* FSE_H */

 #ifdef FSE_STATIC_LINKING_ONLY
 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
 #define FSE_H_FSE_STATIC_LINKING_ONLY

 /* *** Dependency *** */
 #include "bitstream.h"
@@ -381,6 +384,11 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */

 typedef enum {
   FSE_repeat_none,  /**< Cannot use the previous table */
   FSE_repeat_check, /**< Can use the previous table but it must be checked */
   FSE_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
 } FSE_repeat;

 /* *****************************************
 *  FSE symbol compression API
@@ -694,5 +702,3 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 #if defined (__cplusplus)
 }
 #endif

 #endif  /* FSE_H */
--- a/contrib/zstd/fse_compress.c
+++ b/contrib/zstd/fse_compress.c
@@ -32,27 +32,6 @@
    - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */

 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
 #      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
 #      define FORCE_INLINE static inline
 #    endif
 #  else
 #    define FORCE_INLINE static
 #  endif /* __STDC_VERSION__ */
 #endif


 /* **************************************************************
 *  Includes
 ****************************************************************/
@@ -60,13 +39,16 @@
 #include <string.h>     /* memcpy, memset */
 #include <stdio.h>      /* printf (debug) */
 #include "bitstream.h"
 #include "compiler.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"


 /* **************************************************************
 *  Error Management
 ****************************************************************/
 #define FSE_isError ERR_isError
 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */


@@ -479,6 +461,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
    U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
    assert(srcSize > 1); /* Not supported, RLE should be used instead */
    return minBits;
 }

@@ -487,6 +470,7 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
    U32 tableLog = maxTableLog;
    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
    assert(srcSize > 1); /* Not supported, RLE should be used instead */
    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
@@ -781,7 +765,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,

 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }

 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }

 /* FSE_compress_wksp() :
--- a/contrib/zstd/fse_decompress.c
+++ b/contrib/zstd/fse_decompress.c
@@ -33,35 +33,16 @@
 ****************************************************************** */


 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
 #      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
 #      define FORCE_INLINE static inline
 #    endif
 #  else
 #    define FORCE_INLINE static
 #  endif /* __STDC_VERSION__ */
 #endif


 /* **************************************************************
 *  Includes
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
 #include "bitstream.h"
 #include "compiler.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"


 /* **************************************************************
@@ -216,7 +197,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
    return 0;
 }

 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
 FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
          void* dst, size_t maxDstSize,
    const void* cSrc, size_t cSrcSize,
    const FSE_DTable* dt, const unsigned fast)
--- a/contrib/zstd/huf.h
+++ b/contrib/zstd/huf.h
@@ -31,13 +31,13 @@
   You can contact the author at :
   - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 ****************************************************************** */
 #ifndef HUF_H_298734234
 #define HUF_H_298734234

 #if defined (__cplusplus)
 extern "C" {
 #endif

 #ifndef HUF_H_298734234
 #define HUF_H_298734234

 /* *** Dependencies *** */
 #include <stddef.h>    /* size_t */
@@ -111,7 +111,20 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void*
 #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);

 /**
 *  The minimum workspace size for the `workSpace` used in
 *  HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
 *
 *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
 *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
 *  Buffer overflow errors may potentially occur if code modifications result in
 *  a required workspace size greater than that specified in the following
 *  macro.
 */
 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))

 #endif   /* HUF_H_298734234 */

 /* ******************************************************************
 *  WARNING !!
@@ -120,7 +133,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
 *  because they are not guaranteed to remain stable in the future.
 *  Only consider them in association with static linking.
 *******************************************************************/
 #ifdef HUF_STATIC_LINKING_ONLY
 #if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
 #define HUF_H_HUF_STATIC_LINKING_ONLY

 /* *** Dependencies *** */
 #include "mem.h"   /* U32 */
@@ -170,8 +184,11 @@ size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS

 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */


 /* ****************************************
@@ -243,7 +260,9 @@ HUF_decompress() does the following:
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);

 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
 size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);

 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
@@ -266,8 +285,11 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */

 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */

 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
@@ -275,9 +297,6 @@ size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* c

 #endif /* HUF_STATIC_LINKING_ONLY */


 #if defined (__cplusplus)
 }
 #endif

 #endif   /* HUF_H_298734234 */
--- a/contrib/zstd/huf_compress.c
+++ b/contrib/zstd/huf_compress.c
@@ -50,13 +50,15 @@
 #include "fse.h"        /* header compression */
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #include "error_private.h"


 /* **************************************************************
 *  Error Management
 ****************************************************************/
 #define HUF_isError ERR_isError
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }


@@ -266,7 +268,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
                        if (highTotal <= lowTotal) break;
                }   }
                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
                    nBitsToDecrease ++;
                totalCost -= 1 << (nBitsToDecrease-1);
                if (rankLast[nBitsToDecrease-1] == noSymbol)
@@ -435,7 +438,7 @@ static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt*

 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }

 #define HUF_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)

 #define HUF_FLUSHBITS_1(stream) \
    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
@@ -450,7 +453,6 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
    BYTE* const oend = ostart + dstSize;
    BYTE* op = ostart;
    size_t n;
    const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
    BIT_CStream_t bitC;

    /* init */
--- a/contrib/zstd/huf_decompress.c
+++ b/contrib/zstd/huf_decompress.c
@@ -32,41 +32,31 @@
    - Public forum : https://groups.google.com/forum/#!forum/lz4c
 ****************************************************************** */

 /* **************************************************************
 *  Compiler specifics
 ****************************************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
 #      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
 #      define FORCE_INLINE static inline
 #    endif
 #  else
 #    define FORCE_INLINE static
 #  endif /* __STDC_VERSION__ */
 #endif


 /* **************************************************************
 *  Dependencies
 ****************************************************************/
 #include <string.h>     /* memcpy, memset */
 #include "bitstream.h"  /* BIT_* */
 #include "compiler.h"
 #include "fse.h"        /* header compression */
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #include "error_private.h"


 /* **************************************************************
 *  Error Management
 ****************************************************************/
 #define HUF_isError ERR_isError
 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */


 /* **************************************************************
 *  Byte alignment for workSpace management
 ****************************************************************/
 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))

 /*-***************************/
 /*  generic DTableDesc       */
 /*-***************************/
@@ -87,16 +77,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)

 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2;   /* single-symbol decoding */

 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
 {
    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
    U32 tableLog = 0;
    U32 nbSymbols = 0;
    size_t iSize;
    void* const dtPtr = DTable + 1;
    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;

    U32* rankVal;
    BYTE* huffWeight;
    size_t spaceUsed32 = 0;

    rankVal = (U32 *)workSpace + spaceUsed32;
    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;

    if ((spaceUsed32 << 2) > wkspSize)
        return ERROR(tableLog_tooLarge);
    workSpace = (U32 *)workSpace + spaceUsed32;
    wkspSize -= (spaceUsed32 << 2);

    HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */

@@ -135,6 +137,13 @@ size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
    return iSize;
 }

 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_readDTableX2_wksp(DTable, src, srcSize,
                                 workSpace, sizeof(workSpace));
 }


 static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
 {
@@ -155,7 +164,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
    if (MEM_64bits()) \
        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)

 FORCE_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
 HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
 {
    BYTE* const pStart = p;

@@ -212,11 +221,13 @@ size_t HUF_decompress1X2_usingDTable(
    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }

 size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
                                   const void* cSrc, size_t cSrcSize,
                                   void* workSpace, size_t wkspSize)
 {
    const BYTE* ip = (const BYTE*) cSrc;

    size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -224,6 +235,15 @@ size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
    return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }


 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
                              const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
                                       workSpace, sizeof(workSpace));
 }

 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -335,11 +355,14 @@ size_t HUF_decompress4X2_usingDTable(
 }


 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
                                   const void* cSrc, size_t cSrcSize,
                                   void* workSpace, size_t wkspSize)
 {
    const BYTE* ip = (const BYTE*) cSrc;

    size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
    size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
                                                workSpace, wkspSize);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -347,6 +370,13 @@ size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
    return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
 }


 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
                                       workSpace, sizeof(workSpace));
 }
 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
@@ -403,7 +433,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
    }   }
 }

 typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];

 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
@@ -447,20 +478,43 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
    }
 }

 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
 size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
                             size_t srcSize, void* workSpace,
                             size_t wkspSize)
 {
    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
    U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 };
    U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 };
    U32* const rankStart = rankStart0+1;
    rankVal_t rankVal;
    U32 tableLog, maxW, sizeOfSort, nbSymbols;
    DTableDesc dtd = HUF_getDTableDesc(DTable);
    U32 const maxTableLog = dtd.maxTableLog;
    size_t iSize;
    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
    HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
    U32 *rankStart;

    rankValCol_t* rankVal;
    U32* rankStats;
    U32* rankStart0;
    sortedSymbol_t* sortedSymbol;
    BYTE* weightList;
    size_t spaceUsed32 = 0;

    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
    rankStats = (U32 *)workSpace + spaceUsed32;
    spaceUsed32 += HUF_TABLELOG_MAX + 1;
    rankStart0 = (U32 *)workSpace + spaceUsed32;
    spaceUsed32 += HUF_TABLELOG_MAX + 2;
    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;

    if ((spaceUsed32 << 2) > wkspSize)
        return ERROR(tableLog_tooLarge);
    workSpace = (U32 *)workSpace + spaceUsed32;
    wkspSize -= (spaceUsed32 << 2);

    rankStart = rankStart0 + 1;
    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));

    HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
@@ -527,6 +581,12 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
    return iSize;
 }

 size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
 {
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
  return HUF_readDTableX4_wksp(DTable, src, srcSize,
                               workSpace, sizeof(workSpace));
 }

 static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
 {
@@ -545,7 +605,8 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
            BIT_skipBits(DStream, dt[val].nbBits);
            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);   /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
    }   }
    return 1;
 }
@@ -562,7 +623,7 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
    if (MEM_64bits()) \
        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)

 FORCE_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
 HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
 {
    BYTE* const pStart = p;

@@ -626,11 +687,14 @@ size_t HUF_decompress1X4_usingDTable(
    return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 }

 size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
                                   const void* cSrc, size_t cSrcSize,
                                   void* workSpace, size_t wkspSize)
 {
    const BYTE* ip = (const BYTE*) cSrc;

    size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
    size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
                                               workSpace, wkspSize);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -638,6 +702,15 @@ size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, cons
    return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
 }


 size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
                              const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
                                       workSpace, sizeof(workSpace));
 }

 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -748,11 +821,14 @@ size_t HUF_decompress4X4_usingDTable(
 }


 size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
                                   const void* cSrc, size_t cSrcSize,
                                   void* workSpace, size_t wkspSize)
 {
    const BYTE* ip = (const BYTE*) cSrc;

    size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
    size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
                                         workSpace, wkspSize);
    if (HUF_isError(hSize)) return hSize;
    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
    ip += hSize; cSrcSize -= hSize;
@@ -760,6 +836,15 @@ size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, cons
    return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
 }


 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
                              const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
                                       workSpace, sizeof(workSpace));
 }

 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
@@ -816,11 +901,11 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
 *   Tells which decoder is likely to decode faster,
 *   based on a set of pre-determined metrics.
 *   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
 *   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
 *   Assumption : 0 < cSrcSize, dstSize <= 128 KB */
 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
 {
    /* decoder timing evaluation */
    U32 const Q = (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 since dstSize > cSrcSize */
    U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
    U32 const D256 = (U32)(dstSize >> 8);
    U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
    U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
@@ -861,19 +946,32 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
    }
 }

 size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
                                         workSpace, sizeof(workSpace));
 }


 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
                                     size_t dstSize, const void* cSrc,
                                     size_t cSrcSize, void* workSpace,
                                     size_t wkspSize)
 {
    /* validation checks */
    if (dstSize == 0) return ERROR(dstSize_tooSmall);
    if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected);   /* invalid */
    if (cSrcSize == 0) return ERROR(corruption_detected);

    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
        return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
                        HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
        return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
                        HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
    }
 }

 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
                                  const void* cSrc, size_t cSrcSize,
                                  void* workSpace, size_t wkspSize)
 {
    /* validation checks */
    if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -882,7 +980,17 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */

    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
        return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
                        HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
        return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
                                cSrcSize, workSpace, wkspSize):
                        HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
                                cSrcSize, workSpace, wkspSize);
    }
 }

 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
                             const void* cSrc, size_t cSrcSize)
 {
    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
                                      workSpace, sizeof(workSpace));
 }
--- a/contrib/zstd/mem.h
+++ b/contrib/zstd/mem.h
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef MEM_H_MODULE
@@ -110,7 +111,7 @@ Only use if no other choice to achieve best performance on target platform */
 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
 MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }

 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
@@ -131,7 +132,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
 MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
 MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }

 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
@@ -352,20 +353,6 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 }


 /* function safe only for comparisons */
 MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
 {
    switch (length)
    {
    default :
    case 4 : return MEM_read32(memPtr);
    case 3 : if (MEM_isLittleEndian())
                return MEM_read32(memPtr)<<8;
             else
                return MEM_read32(memPtr)>>8;
    }
 }

 #if defined (__cplusplus)
 }
 #endif
--- a/contrib/zstd/pool.c
+++ b/contrib/zstd/pool.c
@@ -1,10 +1,11 @@
 /**
 * Copyright (c) 2016-present, Facebook, Inc.
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */


@@ -25,11 +26,12 @@

 /* A job is a function and an opaque argument */
 typedef struct POOL_job_s {
  POOL_function function;
  void *opaque;
    POOL_function function;
    void *opaque;
 } POOL_job;

 struct POOL_ctx_s {
    ZSTD_customMem customMem;
    /* Keep track of the threads */
    pthread_t *threads;
    size_t numThreads;
@@ -39,6 +41,12 @@ struct POOL_ctx_s {
    size_t queueHead;
    size_t queueTail;
    size_t queueSize;

    /* The number of threads working on jobs */
    size_t numThreadsBusy;
    /* Indicates if the queue is empty */
    int queueEmpty;

    /* The mutex protects the queue */
    pthread_mutex_t queueMutex;
    /* Condition variable for pushers to wait on when the queue is full */
@@ -60,48 +68,66 @@ static void* POOL_thread(void* opaque) {
    for (;;) {
        /* Lock the mutex and wait for a non-empty queue or until shutdown */
        pthread_mutex_lock(&ctx->queueMutex);
        while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) {

        while (ctx->queueEmpty && !ctx->shutdown) {
            pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
        }
        /* empty => shutting down: so stop */
        if (ctx->queueHead == ctx->queueTail) {
        if (ctx->queueEmpty) {
            pthread_mutex_unlock(&ctx->queueMutex);
            return opaque;
        }
        /* Pop a job off the queue */
        {   POOL_job const job = ctx->queue[ctx->queueHead];
            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
            ctx->numThreadsBusy++;
            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
            /* Unlock the mutex, signal a pusher, and run the job */
            pthread_mutex_unlock(&ctx->queueMutex);
            pthread_cond_signal(&ctx->queuePushCond);

            job.function(job.opaque);
        }
    }

            /* If the intended queue size was 0, signal after finishing job */
            if (ctx->queueSize == 1) {
                pthread_mutex_lock(&ctx->queueMutex);
                ctx->numThreadsBusy--;
                pthread_mutex_unlock(&ctx->queueMutex);
                pthread_cond_signal(&ctx->queuePushCond);
        }   }
    }  /* for (;;) */
    /* Unreachable */
 }

 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
 }

 POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
    POOL_ctx *ctx;
    /* Check the parameters */
    if (!numThreads || !queueSize) { return NULL; }
    if (!numThreads) { return NULL; }
    /* Allocate the context and zero initialize */
    ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx));
    ctx = (POOL_ctx *)ZSTD_calloc(sizeof(POOL_ctx), customMem);
    if (!ctx) { return NULL; }
    /* Initialize the job queue.
     * It needs one extra space since one space is wasted to differentiate empty
     * and full queues.
     */
    ctx->queueSize = queueSize + 1;
    ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job));
    ctx->queue = (POOL_job*) malloc(ctx->queueSize * sizeof(POOL_job));
    ctx->queueHead = 0;
    ctx->queueTail = 0;
    pthread_mutex_init(&ctx->queueMutex, NULL);
    pthread_cond_init(&ctx->queuePushCond, NULL);
    pthread_cond_init(&ctx->queuePopCond, NULL);
    ctx->numThreadsBusy = 0;
    ctx->queueEmpty = 1;
    (void)pthread_mutex_init(&ctx->queueMutex, NULL);
    (void)pthread_cond_init(&ctx->queuePushCond, NULL);
    (void)pthread_cond_init(&ctx->queuePopCond, NULL);
    ctx->shutdown = 0;
    /* Allocate space for the thread handles */
    ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t));
    ctx->threads = (pthread_t*)ZSTD_malloc(numThreads * sizeof(pthread_t), customMem);
    ctx->numThreads = 0;
    ctx->customMem = customMem;
    /* Check for errors */
    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
    /* Initialize the threads */
@@ -141,27 +167,49 @@ void POOL_free(POOL_ctx *ctx) {
    pthread_mutex_destroy(&ctx->queueMutex);
    pthread_cond_destroy(&ctx->queuePushCond);
    pthread_cond_destroy(&ctx->queuePopCond);
    if (ctx->queue) free(ctx->queue);
    if (ctx->threads) free(ctx->threads);
    free(ctx);
    ZSTD_free(ctx->queue, ctx->customMem);
    ZSTD_free(ctx->threads, ctx->customMem);
    ZSTD_free(ctx, ctx->customMem);
 }

 size_t POOL_sizeof(POOL_ctx *ctx) {
    if (ctx==NULL) return 0;  /* supports sizeof NULL */
    return sizeof(*ctx)
        + ctx->queueSize * sizeof(POOL_job)
        + ctx->numThreads * sizeof(pthread_t);
 }

 void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {
    POOL_ctx *ctx = (POOL_ctx *)ctxVoid;
 /**
 * Returns 1 if the queue is full and 0 otherwise.
 *
 * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
 * then a queue is empty if there is a thread free and no job is waiting.
 */
 static int isQueueFull(POOL_ctx const* ctx) {
    if (ctx->queueSize > 1) {
        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
    } else {
        return ctx->numThreadsBusy == ctx->numThreads ||
               !ctx->queueEmpty;
    }
 }

 void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
    POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
    if (!ctx) { return; }

    pthread_mutex_lock(&ctx->queueMutex);
    {   POOL_job const job = {function, opaque};

        /* Wait until there is space in the queue for the new job */
        size_t newTail = (ctx->queueTail + 1) % ctx->queueSize;
        while (ctx->queueHead == newTail && !ctx->shutdown) {
        while (isQueueFull(ctx) && !ctx->shutdown) {
          pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
          newTail = (ctx->queueTail + 1) % ctx->queueSize;
        }
        /* The queue is still going => there is space */
        if (!ctx->shutdown) {
            ctx->queueEmpty = 0;
            ctx->queue[ctx->queueTail] = job;
            ctx->queueTail = newTail;
            ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
        }
    }
    pthread_mutex_unlock(&ctx->queueMutex);
@@ -173,22 +221,35 @@ void POOL_add(void *ctxVoid, POOL_function function, void *opaque) {

 /* We don't need any data, but if it is empty malloc() might return NULL. */
 struct POOL_ctx_s {
  int data;
    int dummy;
 };
 static POOL_ctx g_ctx;

 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) {
  (void)numThreads;
  (void)queueSize;
  return (POOL_ctx *)malloc(sizeof(POOL_ctx));
 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
 }

 void POOL_free(POOL_ctx *ctx) {
  if (ctx) free(ctx);
 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
    (void)numThreads;
    (void)queueSize;
    (void)customMem;
    return &g_ctx;
 }

 void POOL_free(POOL_ctx* ctx) {
    assert(!ctx || ctx == &g_ctx);
    (void)ctx;
 }

 void POOL_add(void* ctx, POOL_function function, void* opaque) {
    (void)ctx;
    function(opaque);
 }

 void POOL_add(void *ctx, POOL_function function, void *opaque) {
  (void)ctx;
  function(opaque);
 size_t POOL_sizeof(POOL_ctx* ctx) {
    if (ctx==NULL) return 0;  /* supports sizeof NULL */
    assert(ctx == &g_ctx);
    return sizeof(*ctx);
 }

 #endif  /* ZSTD_MULTITHREAD */
--- a/contrib/zstd/pool.h
+++ b/contrib/zstd/pool.h
@@ -1,11 +1,13 @@
 /**
 * Copyright (c) 2016-present, Facebook, Inc.
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef POOL_H
 #define POOL_H

@@ -15,23 +17,30 @@ extern "C" {


 #include <stddef.h>   /* size_t */
 #include "zstd_internal.h"   /* ZSTD_customMem */

 typedef struct POOL_ctx_s POOL_ctx;

 /*! POOL_create() :
    Create a thread pool with at most `numThreads` threads.
    `numThreads` must be at least 1.
    The maximum number of queued jobs before blocking is `queueSize`.
    `queueSize` must be at least 1.
    @return : The POOL_ctx pointer on success else NULL.
 *  Create a thread pool with at most `numThreads` threads.
 * `numThreads` must be at least 1.
 *  The maximum number of queued jobs before blocking is `queueSize`.
 * @return : POOL_ctx pointer on success, else NULL.
 */
 POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);

 POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);

 /*! POOL_free() :
    Free a thread pool returned by POOL_create().
 */
 void POOL_free(POOL_ctx *ctx);

 /*! POOL_sizeof() :
    return memory usage of pool returned by POOL_create().
 */
 size_t POOL_sizeof(POOL_ctx *ctx);

 /*! POOL_function :
    The function type that can be added to a thread pool.
 */
--- a/contrib/zstd/threading.c
+++ b/contrib/zstd/threading.c
@@ -2,9 +2,9 @@
 * Copyright (c) 2016 Tino Reichardt
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 *
 * You can contact the author at:
 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
@@ -14,12 +14,8 @@
 * This file will hold wrapper for systems, which do not support pthreads
 */

 /* When ZSTD_MULTITHREAD is not defined, this file would become an empty translation unit.
 * Include some ISO C header code to prevent this and portably avoid related warnings.
 * (Visual C++: C4206 / GCC: -Wpedantic / Clang: -Wempty-translation-unit)
 */
 #include <stddef.h>

 /* create fake symbol to avoid empty trnaslation unit warning */
 int g_ZSTD_threading_useles_symbol;

 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)

--- a/contrib/zstd/threading.h
+++ b/contrib/zstd/threading.h
@@ -1,11 +1,10 @@

 /**
 * Copyright (c) 2016 Tino Reichardt
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 *
 * You can contact the author at:
 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
@@ -38,18 +37,22 @@ extern "C" {
 #  define WIN32_LEAN_AND_MEAN
 #endif

 #undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
 #include <windows.h>
 #undef ERROR
 #define ERROR(name) ZSTD_ERROR(name)


 /* mutex */
 #define pthread_mutex_t           CRITICAL_SECTION
 #define pthread_mutex_init(a,b)   InitializeCriticalSection((a))
 #define pthread_mutex_init(a,b)   (InitializeCriticalSection((a)), 0)
 #define pthread_mutex_destroy(a)  DeleteCriticalSection((a))
 #define pthread_mutex_lock(a)     EnterCriticalSection((a))
 #define pthread_mutex_unlock(a)   LeaveCriticalSection((a))

 /* condition variable */
 #define pthread_cond_t             CONDITION_VARIABLE
 #define pthread_cond_init(a, b)    InitializeConditionVariable((a))
 #define pthread_cond_init(a, b)    (InitializeConditionVariable((a)), 0)
 #define pthread_cond_destroy(a)    /* No delete */
 #define pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
 #define pthread_cond_signal(a)     WakeConditionVariable((a))
@@ -80,14 +83,14 @@ int _pthread_join(pthread_t* thread, void** value_ptr);
 #else  /* ZSTD_MULTITHREAD not defined */
 /* No multithreading support */

 #define pthread_mutex_t int   /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */
 #define pthread_mutex_init(a,b)
 #define pthread_mutex_t int   /* #define rather than typedef, because sometimes pthread support is implicit, resulting in duplicated symbols */
 #define pthread_mutex_init(a,b)    ((void)a, 0)
 #define pthread_mutex_destroy(a)
 #define pthread_mutex_lock(a)
 #define pthread_mutex_unlock(a)

 #define pthread_cond_t int
 #define pthread_cond_init(a,b)
 #define pthread_cond_init(a,b)     ((void)a, 0)
 #define pthread_cond_destroy(a)
 #define pthread_cond_wait(a,b)
 #define pthread_cond_signal(a)
--- a/contrib/zstd/zdict.c
+++ b/contrib/zstd/zdict.c
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */


@@ -60,7 +61,7 @@

 #define NOISELENGTH 32

 static const int g_compressionLevel_default = 6;
 static const int g_compressionLevel_default = 3;
 static const U32 g_selectivity_default = 9;


@@ -94,7 +95,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
 unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
 {
    if (dictSize < 8) return 0;
    if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
    if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
    return MEM_readLE32((const char*)dictBuffer + 4);
 }

@@ -374,7 +375,7 @@ static int isIncluded(const void* in, const void* container, size_t length)
    return u==length;
 }

 /*! ZDICT_checkMerge
 /*! ZDICT_tryMerge() :
    check if dictItem can be merged, do it if possible
    @return : id of destination elt, 0 if not merged
 */
@@ -439,8 +440,8 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const

 static void ZDICT_removeDictItem(dictItem* table, U32 id)
 {
    /* convention : first element is nb of elts */
    U32 const max = table->pos;
    /* convention : table[0].pos stores nb of elts */
    U32 const max = table[0].pos;
    U32 u;
    if (!id) return;   /* protection, should never happen */
    for (u=id; u<max-1; u++)
@@ -487,7 +488,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 }


 static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
 static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
                            const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                            const size_t* fileSizes, unsigned nbFiles,
                            U32 minRatio, U32 notificationLevel)
@@ -576,7 +577,7 @@ typedef struct
 {
    ZSTD_CCtx* ref;
    ZSTD_CCtx* zc;
    void* workPlace;   /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
 } EStats_ress_t;

 #define MAXREPOFFSET 1024
@@ -585,14 +586,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
                            const void* src, size_t srcSize, U32 notificationLevel)
 {
    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
    size_t cSize;

    if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
    {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
            if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
    }
    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }

    if (cSize) {  /* if == 0; block is not compressible */
@@ -634,17 +635,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
    }   }   }
 }

 /*
 static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
 {
    unsigned u;
    size_t max=0;
    for (u=0; u<nbFiles; u++)
        if (max < fileSizes[u]) max = fileSizes[u];
    return max;
 }
 */

 static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
 {
    size_t total=0;
@@ -700,13 +690,13 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* init */
    esr.ref = ZSTD_createCCtx();
    esr.zc = ZSTD_createCCtx();
    esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
    if (!esr.ref || !esr.zc || !esr.workPlace) {
        eSize = ERROR(memory_allocation);
        DISPLAYLEVEL(1, "Not enough memory \n");
        goto _cleanup;
    }
    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; }   /* too large dictionary */
    for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
    for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
    for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
@@ -714,7 +704,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    memset(repOffset, 0, sizeof(repOffset));
    repOffset[1] = repOffset[4] = repOffset[8] = 1;
    memset(bestRepOffset, 0, sizeof(bestRepOffset));
    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
    if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
    params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
    {   size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
        if (ZSTD_isError(beginResult)) {
@@ -865,7 +855,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);

    /* dictionary header */
    MEM_writeLE32(header, ZSTD_DICT_MAGIC);
    MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
    {   U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
        U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -917,7 +907,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
    }

    /* add dictionary header (after entropy tables) */
    MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
    MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
    {   U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
        U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
        U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -930,14 +920,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }


 /*! ZDICT_trainFromBuffer_unsafe() :
 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
 *   Warning : `samplesBuffer` must be followed by noisy guard band.
 *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
 size_t ZDICT_trainFromBuffer_unsafe(
 size_t ZDICT_trainFromBuffer_unsafe_legacy(
                            void* dictBuffer, size_t maxDictSize,
                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                            ZDICT_params_t params)
                            ZDICT_legacy_params_t params)
 {
    U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
    dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -946,7 +936,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
    size_t const targetDictSize = maxDictSize;
    size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
    size_t dictSize = 0;
    U32 const notificationLevel = params.notificationLevel;
    U32 const notificationLevel = params.zParams.notificationLevel;

    /* checks */
    if (!dictList) return ERROR(memory_allocation);
@@ -957,13 +947,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
    ZDICT_initDictItem(dictList);

    /* build dictionary */
    ZDICT_trainBuffer(dictList, dictListSize,
                    samplesBuffer, samplesBuffSize,
                    samplesSizes, nbSamples,
                    minRep, notificationLevel);
    ZDICT_trainBuffer_legacy(dictList, dictListSize,
                       samplesBuffer, samplesBuffSize,
                       samplesSizes, nbSamples,
                       minRep, notificationLevel);

    /* display best matches */
    if (params.notificationLevel>= 3) {
    if (params.zParams.notificationLevel>= 3) {
        U32 const nb = MIN(25, dictList[0].pos);
        U32 const dictContentSize = ZDICT_dictSize(dictList);
        U32 u;
@@ -1026,7 +1016,7 @@ size_t ZDICT_trainFromBuffer_unsafe(

        dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
                                                             samplesBuffer, samplesSizes, nbSamples,
                                                             params);
                                                             params.zParams);
    }

    /* clean up */
@@ -1037,9 +1027,9 @@ size_t ZDICT_trainFromBuffer_unsafe(

 /* issue : samplesBuffer need to be followed by a noisy guard band.
 *  work around : duplicate the buffer, and add the noise */
 size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
                                      const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                      ZDICT_params_t params)
 size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                              ZDICT_legacy_params_t params)
 {
    size_t result;
    void* newBuff;
@@ -1052,10 +1042,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
    memcpy(newBuff, samplesBuffer, sBuffSize);
    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */

    result = ZDICT_trainFromBuffer_unsafe(
                                        dictBuffer, dictBufferCapacity,
                                        newBuff, samplesSizes, nbSamples,
                                        params);
    result =
        ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
                                            samplesSizes, nbSamples, params);
    free(newBuff);
    return result;
 }
@@ -1064,11 +1053,15 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
 {
    ZDICT_params_t params;
    ZDICT_cover_params_t params;
    memset(&params, 0, sizeof(params));
    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
                                          samplesBuffer, samplesSizes, nbSamples,
                                          params);
    params.d = 8;
    params.steps = 4;
    /* Default to level 6 since no compression level information is avaialble */
    params.zParams.compressionLevel = 6;
    return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
                                               samplesBuffer, samplesSizes,
                                               nbSamples, &params);
 }

 size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
--- a/contrib/zstd/zdict.h
+++ b/contrib/zstd/zdict.h
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef DICTBUILDER_H_001
@@ -20,10 +21,12 @@ extern "C" {


 /* =====   ZDICTLIB_API : control library symbols visibility   ===== */
 #if defined(__GNUC__) && (__GNUC__ >= 4)
 #  define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
 #else
 #  define ZDICTLIB_VISIBILITY
 #ifndef ZDICTLIB_VISIBILITY
 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 #    define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
 #  else
 #    define ZDICTLIB_VISIBILITY
 #  endif
 #endif
 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
 #  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
@@ -34,18 +37,20 @@ extern "C" {
 #endif


 /*! ZDICT_trainFromBuffer() :
    Train a dictionary from an array of samples.
    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
    The resulting dictionary will be saved into `dictBuffer`.
    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
              or an error code, which can be tested with ZDICT_isError().
    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 */
 /*! ZDICT_trainFromBuffer():
 * Train a dictionary from an array of samples.
 * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
 * The resulting dictionary will be saved into `dictBuffer`.
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *           or an error code, which can be tested with ZDICT_isError().
 * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
 * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
 *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                       const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);

@@ -67,94 +72,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
 * ==================================================================================== */

 typedef struct {
    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
    unsigned reserved[2];        /* reserved space for future parameters */
 } ZDICT_params_t;


 /*! ZDICT_trainFromBuffer_advanced() :
    Same as ZDICT_trainFromBuffer() with control over more parameters.
    `parameters` is optional and can be provided with values set to 0 to mean "default".
    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
              or an error code, which can be tested by ZDICT_isError().
    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
 */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                ZDICT_params_t parameters);

 /*! COVER_params_t :
    For all values 0 means default.
    k and d are the only required parameters.
 */
 /*! ZDICT_cover_params_t:
 *  For all values 0 means default.
 *  k and d are the only required parameters.
 */
 typedef struct {
    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */

    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
 } COVER_params_t;


 /*! COVER_trainFromBuffer() :
    Train a dictionary from an array of samples using the COVER algorithm.
    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
    The resulting dictionary will be saved into `dictBuffer`.
    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
              or an error code, which can be tested with ZDICT_isError().
    Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 */
 ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                              COVER_params_t parameters);

 /*! COVER_optimizeTrainFromBuffer() :
    The same requirements as above hold for all the parameters except `parameters`.
    This function tries many parameter combinations and picks the best parameters.
    `*parameters` is filled with the best parameters found, and the dictionary
    constructed with those parameters is stored in `dictBuffer`.

    All of the parameters d, k, steps are optional.
    If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
    if steps is zero it defaults to its default value.
    If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].

    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
              or an error code, which can be tested with ZDICT_isError().
              On success `*parameters` contains the parameters selected.
    Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
 */
 ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                                     const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
                                     COVER_params_t *parameters);

 /*! ZDICT_finalizeDictionary() :

    Given a custom content as a basis for dictionary, and a set of samples,
    finalize dictionary by adding headers and statistics.

    Samples must be stored concatenated in a flat buffer `samplesBuffer`,
    supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.

    dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
    maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.

    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
              or an error code, which can be tested by ZDICT_isError().
    note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
    note 2 : dictBuffer and dictContent can overlap
 */
    ZDICT_params_t zParams;
 } ZDICT_cover_params_t;


 /*! ZDICT_trainFromBuffer_cover():
 * Train a dictionary from an array of samples using the COVER algorithm.
 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
 * The resulting dictionary will be saved into `dictBuffer`.
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *           or an error code, which can be tested with ZDICT_isError().
 * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
 * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
 *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
    const size_t *samplesSizes, unsigned nbSamples,
    ZDICT_cover_params_t parameters);

 /*! ZDICT_optimizeTrainFromBuffer_cover():
 * The same requirements as above hold for all the parameters except `parameters`.
 * This function tries many parameter combinations and picks the best parameters.
 * `*parameters` is filled with the best parameters found, and the dictionary
 * constructed with those parameters is stored in `dictBuffer`.
 *
 * All of the parameters d, k, steps are optional.
 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
 * if steps is zero it defaults to its default value.
 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
 *
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *           or an error code, which can be tested with ZDICT_isError().
 *           On success `*parameters` contains the parameters selected.
 * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
 */
 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
    const size_t *samplesSizes, unsigned nbSamples,
    ZDICT_cover_params_t *parameters);

 /*! ZDICT_finalizeDictionary():
 * Given a custom content as a basis for dictionary, and a set of samples,
 * finalize dictionary by adding headers and statistics.
 *
 * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
 * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
 *
 * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
 * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
 *
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
 *           or an error code, which can be tested by ZDICT_isError().
 * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
 * Note 2: dictBuffer and dictContent can overlap
 */
 #define ZDICT_CONTENTSIZE_MIN 128
 #define ZDICT_DICTSIZE_MIN    256
 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
@@ -162,7 +151,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                ZDICT_params_t parameters);


 typedef struct {
    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
    ZDICT_params_t zParams;
 } ZDICT_legacy_params_t;

 /*! ZDICT_trainFromBuffer_legacy():
 * Train a dictionary from an array of samples.
 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
 * The resulting dictionary will be saved into `dictBuffer`.
 * `parameters` is optional and can be provided with values set to 0 to mean "default".
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *           or an error code, which can be tested with ZDICT_isError().
 * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, but this can vary a lot.
 *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
 * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
 */
 ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
    const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);

 /* Deprecation warnings */
 /* It is generally possible to disable deprecation warnings from compiler,
--- a/contrib/zstd/zstd.h
+++ b/contrib/zstd/zstd.h
--- a/contrib/zstd/zstd_common.c
+++ b/contrib/zstd/zstd_common.c
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */


@@ -12,16 +13,18 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
 #include <stdlib.h>         /* malloc */
 #include <stdlib.h>      /* malloc, calloc, free */
 #include <string.h>      /* memset */
 #include "error_private.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"           /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
 #include "zstd_internal.h"


 /*-****************************************
 *  Version
 ******************************************/
 unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
 unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }

 const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }


 /*-****************************************
@@ -47,27 +50,31 @@ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString
 /*=**************************************************************
 *  Custom allocator
 ****************************************************************/
 /* default uses stdlib */
 void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
 {
    void* address = malloc(size);
    (void)opaque;
    return address;
 }

 void ZSTD_defaultFreeFunction(void* opaque, void* address)
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
 {
    (void)opaque;
    free(address);
    if (customMem.customAlloc)
        return customMem.customAlloc(customMem.opaque, size);
    return malloc(size);
 }

 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
 {
    return customMem.customAlloc(customMem.opaque, size);
    if (customMem.customAlloc) {
        /* calloc implemented as malloc+memset;
         * not as efficient as calloc, but next best guess for custom malloc */
        void* const ptr = customMem.customAlloc(customMem.opaque, size);
        memset(ptr, 0, size);
        return ptr;
    }
    return calloc(1, size);
 }

 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
 {
    if (ptr!=NULL)
        customMem.customFree(customMem.opaque, ptr);
    if (ptr!=NULL) {
        if (customMem.customFree)
            customMem.customFree(customMem.opaque, ptr);
        else
            free(ptr);
    }
 }
--- a/contrib/zstd/zstd_compress.c
+++ b/contrib/zstd/zstd_compress.c
--- a/contrib/zstd/zstd_compress.h
+++ b/contrib/zstd/zstd_compress.h
@@ -0,0 +1,307 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */


 #ifndef ZSTD_COMPRESS_H
 #define ZSTD_COMPRESS_H

 /*-*************************************
 *  Dependencies
 ***************************************/
 #include "zstd_internal.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
 #endif

 #if defined (__cplusplus)
 extern "C" {
 #endif

 /*-*************************************
 *  Constants
 ***************************************/
 static const U32 g_searchStrength = 8;
 #define HASH_READ_SIZE 8


 /*-*************************************
 *  Context memory management
 ***************************************/
 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;

 typedef struct ZSTD_prefixDict_s {
    const void* dict;
    size_t dictSize;
    ZSTD_dictMode_e dictMode;
 } ZSTD_prefixDict;

 struct ZSTD_CCtx_s {
    const BYTE* nextSrc;    /* next block here to continue on current prefix */
    const BYTE* base;       /* All regular indexes relative to this position */
    const BYTE* dictBase;   /* extDict indexes relative to this position */
    U32   dictLimit;        /* below that point, need extDict */
    U32   lowLimit;         /* below that point, no more data */
    U32   nextToUpdate;     /* index from which to continue dictionary update */
    U32   nextToUpdate3;    /* index from which to continue dictionary update */
    U32   hashLog3;         /* dispatch table : larger == faster, more memory */
    U32   loadedDictEnd;    /* index of end of dictionary */
    ZSTD_compressionStage_e stage;
    U32   dictID;
    ZSTD_CCtx_params requestedParams;
    ZSTD_CCtx_params appliedParams;
    void* workSpace;
    size_t workSpaceSize;
    size_t blockSize;
    U64 pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
    U64 consumedSrcSize;
    XXH64_state_t xxhState;
    ZSTD_customMem customMem;
    size_t staticSize;

    seqStore_t seqStore;    /* sequences storage ptrs */
    optState_t optState;
    ldmState_t ldmState;    /* long distance matching state */
    U32* hashTable;
    U32* hashTable3;
    U32* chainTable;
    ZSTD_entropyCTables_t* entropy;

    /* streaming */
    char*  inBuff;
    size_t inBuffSize;
    size_t inToCompress;
    size_t inBuffPos;
    size_t inBuffTarget;
    char*  outBuff;
    size_t outBuffSize;
    size_t outBuffContentSize;
    size_t outBuffFlushedSize;
    ZSTD_cStreamStage streamStage;
    U32    frameEnded;

    /* Dictionary */
    ZSTD_CDict* cdictLocal;
    const ZSTD_CDict* cdict;
    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */

    /* Multi-threading */
 #ifdef ZSTD_MULTITHREAD
    ZSTDMT_CCtx* mtctx;
 #endif
 };


 static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
                                   8,  9, 10, 11, 12, 13, 14, 15,
                                  16, 16, 17, 17, 18, 18, 19, 19,
                                  20, 20, 20, 20, 21, 21, 21, 21,
                                  22, 22, 22, 22, 22, 22, 22, 22,
                                  23, 23, 23, 23, 23, 23, 23, 23,
                                  24, 24, 24, 24, 24, 24, 24, 24,
                                  24, 24, 24, 24, 24, 24, 24, 24 };

 static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
                                  32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
                                  38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
                                  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
                                  41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
                                  42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
                                  42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };

 /*! ZSTD_storeSeq() :
    Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
    `offsetCode` : distance to match, or 0 == repCode.
    `matchCode` : matchLength - MINMATCH
 */
 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
 {
 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
    static const BYTE* g_start = NULL;
    U32 const pos = (U32)((const BYTE*)literals - g_start);
    if (g_start==NULL) g_start = (const BYTE*)literals;
    if ((pos > 0) && (pos < 1000000000))
        DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u",
               pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
 #endif
    /* copy Literals */
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
    seqStorePtr->lit += litLength;

    /* literal Length */
    if (litLength>0xFFFF) {
        seqStorePtr->longLengthID = 1;
        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
    }
    seqStorePtr->sequences[0].litLength = (U16)litLength;

    /* match offset */
    seqStorePtr->sequences[0].offset = offsetCode + 1;

    /* match Length */
    if (matchCode>0xFFFF) {
        seqStorePtr->longLengthID = 2;
        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
    }
    seqStorePtr->sequences[0].matchLength = (U16)matchCode;

    seqStorePtr->sequences++;
 }


 /*-*************************************
 *  Match length counter
 ***************************************/
 static unsigned ZSTD_NbCommonBytes (register size_t val)
 {
    if (MEM_isLittleEndian()) {
        if (MEM_64bits()) {
 #       if defined(_MSC_VER) && defined(_WIN64)
            unsigned long r = 0;
            _BitScanForward64( &r, (U64)val );
            return (unsigned)(r>>3);
 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
            return (__builtin_ctzll((U64)val) >> 3);
 #       else
            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
                                                     0, 3, 1, 3, 1, 4, 2, 7,
                                                     0, 2, 3, 6, 1, 5, 3, 5,
                                                     1, 3, 4, 4, 2, 5, 6, 7,
                                                     7, 0, 1, 2, 3, 3, 4, 6,
                                                     2, 6, 5, 5, 3, 4, 5, 6,
                                                     7, 1, 2, 4, 6, 4, 4, 5,
                                                     7, 2, 6, 5, 7, 6, 7, 7 };
            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 #       endif
        } else { /* 32 bits */
 #       if defined(_MSC_VER)
            unsigned long r=0;
            _BitScanForward( &r, (U32)val );
            return (unsigned)(r>>3);
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_ctz((U32)val) >> 3);
 #       else
            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
                                                     3, 2, 2, 1, 3, 2, 0, 1,
                                                     3, 3, 1, 2, 2, 2, 2, 0,
                                                     3, 1, 2, 0, 1, 0, 1, 1 };
            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 #       endif
        }
    } else {  /* Big Endian CPU */
        if (MEM_64bits()) {
 #       if defined(_MSC_VER) && defined(_WIN64)
            unsigned long r = 0;
            _BitScanReverse64( &r, val );
            return (unsigned)(r>>3);
 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
            return (__builtin_clzll(val) >> 3);
 #       else
            unsigned r;
            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
            r += (!val);
            return r;
 #       endif
        } else { /* 32 bits */
 #       if defined(_MSC_VER)
            unsigned long r = 0;
            _BitScanReverse( &r, (unsigned long)val );
            return (unsigned)(r>>3);
 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
            return (__builtin_clz((U32)val) >> 3);
 #       else
            unsigned r;
            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
            r += (!val);
            return r;
 #       endif
    }   }
 }


 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
 {
    const BYTE* const pStart = pIn;
    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);

    while (pIn < pInLoopLimit) {
        size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
        pIn += ZSTD_NbCommonBytes(diff);
        return (size_t)(pIn - pStart);
    }
    if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
    return (size_t)(pIn - pStart);
 }

 /** ZSTD_count_2segments() :
 *   can count match length with `ip` & `match` in 2 different segments.
 *   convention : on reaching mEnd, match count continue starting from iStart
 */
 MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
 {
    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
    size_t const matchLength = ZSTD_count(ip, match, vEnd);
    if (match + matchLength != mEnd) return matchLength;
    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
 }


 /*-*************************************
 *  Hashes
 ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */

 static const U32 prime4bytes = 2654435761U;
 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }

 static const U64 prime5bytes = 889523592379ULL;
 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }

 static const U64 prime6bytes = 227718039650203ULL;
 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }

 static const U64 prime7bytes = 58295818150454627ULL;
 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }

 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }

 MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 {
    switch(mls)
    {
    default:
    case 4: return ZSTD_hash4Ptr(p, hBits);
    case 5: return ZSTD_hash5Ptr(p, hBits);
    case 6: return ZSTD_hash6Ptr(p, hBits);
    case 7: return ZSTD_hash7Ptr(p, hBits);
    case 8: return ZSTD_hash8Ptr(p, hBits);
    }
 }

 #if defined (__cplusplus)
 }
 #endif

 #endif /* ZSTD_COMPRESS_H */
--- a/contrib/zstd/zstd_decompress.c
+++ b/contrib/zstd/zstd_decompress.c
--- a/contrib/zstd/zstd_double_fast.c
+++ b/contrib/zstd/zstd_double_fast.c
@@ -0,0 +1,308 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #include "zstd_double_fast.h"


 void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls)
 {
    U32* const hashLarge = cctx->hashTable;
    U32  const hBitsL = cctx->appliedParams.cParams.hashLog;
    U32* const hashSmall = cctx->chainTable;
    U32  const hBitsS = cctx->appliedParams.cParams.chainLog;
    const BYTE* const base = cctx->base;
    const BYTE* ip = base + cctx->nextToUpdate;
    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
    const size_t fastHashFillStep = 3;

    while(ip <= iend) {
        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
        hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
        ip += fastHashFillStep;
    }
 }


 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
                                 const void* src, size_t srcSize,
                                 const U32 mls)
 {
    U32* const hashLong = cctx->hashTable;
    const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
    U32* const hashSmall = cctx->chainTable;
    const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
    seqStore_t* seqStorePtr = &(cctx->seqStore);
    const BYTE* const base = cctx->base;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32 lowestIndex = cctx->dictLimit;
    const BYTE* const lowest = base + lowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
    U32 offsetSaved = 0;

    /* init */
    ip += (ip==lowest);
    {   U32 const maxRep = (U32)(ip-lowest);
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }

    /* Main Search Loop */
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
        size_t mLength;
        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
        U32 const current = (U32)(ip-base);
        U32 const matchIndexL = hashLong[h2];
        U32 const matchIndexS = hashSmall[h];
        const BYTE* matchLong = base + matchIndexL;
        const BYTE* match = base + matchIndexS;
        hashLong[h2] = hashSmall[h] = current;   /* update hash tables */

        assert(offset_1 <= current);   /* supposed guaranteed by construction */
        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
            /* favor repcode */
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
            U32 offset;
            if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
                offset = (U32)(ip-matchLong);
                while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
            } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
                size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
                U32 const matchIndexL3 = hashLong[hl3];
                const BYTE* matchL3 = base + matchIndexL3;
                hashLong[hl3] = current + 1;
                if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
                    ip++;
                    offset = (U32)(ip-matchL3);
                    while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
                } else {
                    mLength = ZSTD_count(ip+4, match+4, iend) + 4;
                    offset = (U32)(ip-match);
                    while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
                }
            } else {
                ip += ((ip-anchor) >> g_searchStrength) + 1;
                continue;
            }

            offset_2 = offset_1;
            offset_1 = offset;

            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }

        /* match found */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
            /* Fill Table */
            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
                hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
                hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);

            /* check immediate repcode */
            while ( (ip <= ilimit)
                 && ( (offset_2>0)
                 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
                /* store sequence */
                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
                ip += rLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
    }   }   }

    /* save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    const U32 mls = ctx->appliedParams.cParams.searchLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
        return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4);
    case 5 :
        return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5);
    case 6 :
        return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6);
    case 7 :
        return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7);
    }
 }


 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
                                 const void* src, size_t srcSize,
                                 const U32 mls)
 {
    U32* const hashLong = ctx->hashTable;
    U32  const hBitsL = ctx->appliedParams.cParams.hashLog;
    U32* const hashSmall = ctx->chainTable;
    U32  const hBitsS = ctx->appliedParams.cParams.chainLog;
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const base = ctx->base;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32   lowestIndex = ctx->lowLimit;
    const BYTE* const dictStart = dictBase + lowestIndex;
    const U32   dictLimit = ctx->dictLimit;
    const BYTE* const lowPrefixPtr = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];

    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
        const U32 matchIndex = hashSmall[hSmall];
        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
        const BYTE* match = matchBase + matchIndex;

        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
        const U32 matchLongIndex = hashLong[hLong];
        const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
        const BYTE* matchLong = matchLongBase + matchLongIndex;

        const U32 current = (U32)(ip-base);
        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
        const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
        const BYTE* repMatch = repBase + repIndex;
        size_t mLength;
        hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */

        if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
            ip++;
            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
            if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
                const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
                U32 offset;
                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
                offset = current - matchLongIndex;
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
                U32 const matchIndex3 = hashLong[h3];
                const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
                const BYTE* match3 = match3Base + matchIndex3;
                U32 offset;
                hashLong[h3] = current + 1;
                if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
                    const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
                    const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
                    ip++;
                    offset = current+1 - matchIndex3;
                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
                } else {
                    const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
                    const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
                    offset = current - matchIndex;
                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                }
                offset_2 = offset_1;
                offset_1 = offset;
                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else {
                ip += ((ip-anchor) >> g_searchStrength) + 1;
                continue;
        }   }

        /* found a match : store it */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
            /* Fill Table */
            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
            hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
            /* check immediate repcode */
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                    ip += repLength2;
                    anchor = ip;
                    continue;
                }
                break;
    }   }   }

    /* save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
                         const void* src, size_t srcSize)
 {
    U32 const mls = ctx->appliedParams.cParams.searchLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
        return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4);
    case 5 :
        return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5);
    case 6 :
        return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6);
    case 7 :
        return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7);
    }
 }
--- a/contrib/zstd/zstd_double_fast.h
+++ b/contrib/zstd/zstd_double_fast.h
@@ -0,0 +1,28 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_DOUBLE_FAST_H
 #define ZSTD_DOUBLE_FAST_H

 #include "zstd_compress.h"

 #if defined (__cplusplus)
 extern "C" {
 #endif

 void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls);
 size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

 #if defined (__cplusplus)
 }
 #endif

 #endif /* ZSTD_DOUBLE_FAST_H */
--- a/contrib/zstd/zstd_errors.h
+++ b/contrib/zstd/zstd_errors.h
@@ -1,10 +1,11 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_ERRORS_H_398273423
@@ -19,10 +20,12 @@ extern "C" {


 /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
 #if defined(__GNUC__) && (__GNUC__ >= 4)
 #  define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
 #else
 #  define ZSTDERRORLIB_VISIBILITY
 #ifndef ZSTDERRORLIB_VISIBILITY
 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 #    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
 #  else
 #    define ZSTDERRORLIB_VISIBILITY
 #  endif
 #endif
 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
 #  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
@@ -33,41 +36,43 @@ extern "C" {
 #endif

 /*-****************************************
 *  error codes list
 ******************************************/
 *  error codes list
 *  note : this API is still considered unstable
 *         and shall not be used with a dynamic library.
 *         only static linking is allowed
 ******************************************/
 typedef enum {
  ZSTD_error_no_error,
  ZSTD_error_GENERIC,
  ZSTD_error_prefix_unknown,
  ZSTD_error_version_unsupported,
  ZSTD_error_parameter_unknown,
  ZSTD_error_frameParameter_unsupported,
  ZSTD_error_frameParameter_unsupportedBy32bits,
  ZSTD_error_frameParameter_windowTooLarge,
  ZSTD_error_compressionParameter_unsupported,
  ZSTD_error_init_missing,
  ZSTD_error_memory_allocation,
  ZSTD_error_stage_wrong,
  ZSTD_error_dstSize_tooSmall,
  ZSTD_error_srcSize_wrong,
  ZSTD_error_corruption_detected,
  ZSTD_error_checksum_wrong,
  ZSTD_error_tableLog_tooLarge,
  ZSTD_error_maxSymbolValue_tooLarge,
  ZSTD_error_maxSymbolValue_tooSmall,
  ZSTD_error_dictionary_corrupted,
  ZSTD_error_dictionary_wrong,
  ZSTD_error_dictionaryCreation_failed,
  ZSTD_error_frameIndex_tooLarge,
  ZSTD_error_seekableIO,
  ZSTD_error_maxCode
  ZSTD_error_no_error = 0,
  ZSTD_error_GENERIC  = 1,
  ZSTD_error_prefix_unknown                = 10,
  ZSTD_error_version_unsupported           = 12,
  ZSTD_error_frameParameter_unsupported    = 14,
  ZSTD_error_frameParameter_windowTooLarge = 16,
  ZSTD_error_corruption_detected = 20,
  ZSTD_error_checksum_wrong      = 22,
  ZSTD_error_dictionary_corrupted      = 30,
  ZSTD_error_dictionary_wrong          = 32,
  ZSTD_error_dictionaryCreation_failed = 34,
  ZSTD_error_parameter_unsupported   = 40,
  ZSTD_error_parameter_outOfBound    = 42,
  ZSTD_error_tableLog_tooLarge       = 44,
  ZSTD_error_maxSymbolValue_tooLarge = 46,
  ZSTD_error_maxSymbolValue_tooSmall = 48,
  ZSTD_error_stage_wrong       = 60,
  ZSTD_error_init_missing      = 62,
  ZSTD_error_memory_allocation = 64,
  ZSTD_error_dstSize_tooSmall = 70,
  ZSTD_error_srcSize_wrong    = 72,
  ZSTD_error_frameIndex_tooLarge = 100,
  ZSTD_error_seekableIO          = 102,
  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it may change in future versions! Use ZSTD_isError() instead */
 } ZSTD_ErrorCode;

 /*! ZSTD_getErrorCode() :
    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
    which can be used to compare directly with enum list published into "error_public.h" */
    which can be used to compare with enum list published above */
 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */


 #if defined (__cplusplus)
--- a/contrib/zstd/zstd_fast.c
+++ b/contrib/zstd/zstd_fast.c
@@ -0,0 +1,242 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #include "zstd_fast.h"


 void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
 {
    U32* const hashTable = zc->hashTable;
    U32  const hBits = zc->appliedParams.cParams.hashLog;
    const BYTE* const base = zc->base;
    const BYTE* ip = base + zc->nextToUpdate;
    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
    const size_t fastHashFillStep = 3;

    while(ip <= iend) {
        hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
        ip += fastHashFillStep;
    }
 }


 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
                                 const void* src, size_t srcSize,
                                 const U32 mls)
 {
    U32* const hashTable = cctx->hashTable;
    U32  const hBits = cctx->appliedParams.cParams.hashLog;
    seqStore_t* seqStorePtr = &(cctx->seqStore);
    const BYTE* const base = cctx->base;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32   lowestIndex = cctx->dictLimit;
    const BYTE* const lowest = base + lowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
    U32 offsetSaved = 0;

    /* init */
    ip += (ip==lowest);
    {   U32 const maxRep = (U32)(ip-lowest);
        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
    }

    /* Main Search Loop */
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
        size_t mLength;
        size_t const h = ZSTD_hashPtr(ip, hBits, mls);
        U32 const current = (U32)(ip-base);
        U32 const matchIndex = hashTable[h];
        const BYTE* match = base + matchIndex;
        hashTable[h] = current;   /* update hash table */

        if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
            U32 offset;
            if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
                ip += ((ip-anchor) >> g_searchStrength) + 1;
                continue;
            }
            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
            offset = (U32)(ip-match);
            while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;

            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }

        /* match found */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
            /* Fill Table */
            hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;  /* here because current+2 could be > iend-8 */
            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
            /* check immediate repcode */
            while ( (ip <= ilimit)
                 && ( (offset_2>0)
                 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
                /* store sequence */
                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; }  /* swap offset_2 <=> offset_1 */
                hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
                ip += rLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
    }   }   }

    /* save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                       const void* src, size_t srcSize)
 {
    const U32 mls = ctx->appliedParams.cParams.searchLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
        return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4);
    case 5 :
        return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5);
    case 6 :
        return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6);
    case 7 :
        return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7);
    }
 }


 static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                                 const void* src, size_t srcSize,
                                 const U32 mls)
 {
    U32* hashTable = ctx->hashTable;
    const U32 hBits = ctx->appliedParams.cParams.hashLog;
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const base = ctx->base;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32   lowestIndex = ctx->lowLimit;
    const BYTE* const dictStart = dictBase + lowestIndex;
    const U32   dictLimit = ctx->dictLimit;
    const BYTE* const lowPrefixPtr = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];

    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t h = ZSTD_hashPtr(ip, hBits, mls);
        const U32 matchIndex = hashTable[h];
        const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
        const BYTE* match = matchBase + matchIndex;
        const U32 current = (U32)(ip-base);
        const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
        const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
        const BYTE* repMatch = repBase + repIndex;
        size_t mLength;
        hashTable[h] = current;   /* update hash table */

        if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
            ip++;
            ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
            if ( (matchIndex < lowestIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
                ip += ((ip-anchor) >> g_searchStrength) + 1;
                continue;
            }
            {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
                const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
                U32 offset;
                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                offset = current - matchIndex;
                offset_2 = offset_1;
                offset_1 = offset;
                ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }   }

        /* found a match : store it */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
            /* Fill Table */
            hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
            hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
            /* check immediate repcode */
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
                    ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
                    continue;
                }
                break;
    }   }   }

    /* save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                         const void* src, size_t srcSize)
 {
    U32 const mls = ctx->appliedParams.cParams.searchLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
        return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4);
    case 5 :
        return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5);
    case 6 :
        return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6);
    case 7 :
        return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7);
    }
 }
--- a/contrib/zstd/zstd_fast.h
+++ b/contrib/zstd/zstd_fast.h
@@ -0,0 +1,30 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_FAST_H
 #define ZSTD_FAST_H

 #include "zstd_compress.h"

 #if defined (__cplusplus)
 extern "C" {
 #endif

 void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls);
 size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                         const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                         const void* src, size_t srcSize);

 #if defined (__cplusplus)
 }
 #endif

 #endif /* ZSTD_FAST_H */
--- a/contrib/zstd/zstd_internal.h
+++ b/contrib/zstd/zstd_internal.h
@@ -1,58 +1,73 @@
 /**
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_CCOMMON_H_MODULE
 #define ZSTD_CCOMMON_H_MODULE

 /*-*******************************************************
 *  Compiler specifics
 *********************************************************/
 #ifdef _MSC_VER    /* Visual Studio */
 #  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    /* For Visual 2005 */
 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
 #else
 #  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 #    ifdef __GNUC__
 #      define FORCE_INLINE static inline __attribute__((always_inline))
 #    else
 #      define FORCE_INLINE static inline
 #    endif
 #  else
 #    define FORCE_INLINE static
 #  endif /* __STDC_VERSION__ */
 #endif

 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
 #else
 #  ifdef __GNUC__
 #    define FORCE_NOINLINE static __attribute__((__noinline__))
 #  else
 #    define FORCE_NOINLINE static
 #  endif
 #endif


 /*-*************************************
 *  Dependencies
 ***************************************/
 #include "compiler.h"
 #include "mem.h"
 #include "error_private.h"
 #ifndef ZSTD_STATIC_LINKING_ONLY
 #define ZSTD_STATIC_LINKING_ONLY
 #endif
 #include "zstd.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #define HUF_STATIC_LINKING_ONLY
 #include "huf.h"
 #ifndef XXH_STATIC_LINKING_ONLY
 #  define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */


 #if defined (__cplusplus)
 extern "C" {
 #endif


 /*-*************************************
 *  Debug
 ***************************************/
 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
 #  include <assert.h>
 #else
 #  ifndef assert
 #    define assert(condition) ((void)0)
 #  endif
 #endif

 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }

 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
 #  include <stdio.h>
 /* recommended values for ZSTD_DEBUG display levels :
 * 1 : no display, enables assert() only
 * 2 : reserved for currently active debugging path
 * 3 : events once per object lifetime (CCtx, CDict)
 * 4 : events once per frame
 * 5 : events once per block
 * 6 : events once per sequence (*very* verbose) */
 #  define DEBUGLOG(l, ...) {                         \
                if (l<=ZSTD_DEBUG) {                 \
                    fprintf(stderr, __FILE__ ": ");  \
                    fprintf(stderr, __VA_ARGS__);    \
                    fprintf(stderr, " \n");          \
            }   }
 #else
 #  define DEBUGLOG(l, ...)      {}    /* disabled */
 #endif
 #include "xxhash.h"               /* XXH_reset, update, digest */


 /*-*************************************
@@ -70,7 +85,6 @@
 *  Common constants
 ***************************************/
 #define ZSTD_OPT_NUM    (1<<12)
 #define ZSTD_DICT_MAGIC  0xEC30A437   /* v0.7+ */

 #define ZSTD_REP_NUM      3                 /* number of repcodes */
 #define ZSTD_REP_CHECK    (ZSTD_REP_NUM)    /* number of repcodes to check by the optimal parser */
@@ -178,20 +192,6 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* s
 *********************************************/
 typedef struct ZSTD_stats_s ZSTD_stats_t;

 typedef struct {
    U32 off;
    U32 len;
 } ZSTD_match_t;

 typedef struct {
    U32 price;
    U32 off;
    U32 mlen;
    U32 litlen;
    U32 rep[ZSTD_REP_NUM];
 } ZSTD_optimal_t;


 typedef struct seqDef_s {
    U32 offset;
    U16 litLength;
@@ -209,13 +209,31 @@ typedef struct {
    BYTE* ofCode;
    U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
    U32   longLengthPos;
    /* opt */
    ZSTD_optimal_t* priceTable;
    ZSTD_match_t* matchTable;
    U32* matchLengthFreq;
    U32* litLengthFreq;
    U32   rep[ZSTD_REP_NUM];
    U32   repToConfirm[ZSTD_REP_NUM];
 } seqStore_t;

 typedef struct {
    U32 off;
    U32 len;
 } ZSTD_match_t;

 typedef struct {
    U32 price;
    U32 off;
    U32 mlen;
    U32 litlen;
    U32 rep[ZSTD_REP_NUM];
 } ZSTD_optimal_t;

 typedef struct {
    U32* litFreq;
    U32* litLengthFreq;
    U32* matchLengthFreq;
    U32* offCodeFreq;
    ZSTD_match_t* matchTable;
    ZSTD_optimal_t* priceTable;

    U32  matchLengthSum;
    U32  matchSum;
    U32  litLengthSum;
@@ -231,19 +249,67 @@ typedef struct {
    U32  cachedPrice;
    U32  cachedLitLength;
    const BYTE* cachedLiterals;
 } seqStore_t;
 } optState_t;

 typedef struct {
    U32 offset;
    U32 checksum;
 } ldmEntry_t;

 typedef struct {
    ldmEntry_t* hashTable;
    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
    U64 hashPower;          /* Used to compute the rolling hash.
                             * Depends on ldmParams.minMatchLength */
 } ldmState_t;

 typedef struct {
    U32 enableLdm;          /* 1 if enable long distance matching */
    U32 hashLog;            /* Log size of hashTable */
    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
    U32 minMatchLength;     /* Minimum match length */
    U32 hashEveryLog;       /* Log number of entries to skip */
 } ldmParams_t;

 typedef struct {
    U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
    U32 workspace[HUF_WORKSPACE_SIZE_U32];
    HUF_repeat hufCTable_repeatMode;
    FSE_repeat offcode_repeatMode;
    FSE_repeat matchlength_repeatMode;
    FSE_repeat litlength_repeatMode;
 } ZSTD_entropyCTables_t;

 struct ZSTD_CCtx_params_s {
    ZSTD_compressionParameters cParams;
    ZSTD_frameParameters fParams;

    int compressionLevel;
    U32 forceWindow;           /* force back-references to respect limit of
                                * 1<<wLog, even for dictionary */

    /* Multithreading: used to pass parameters to mtctx */
    U32 nbThreads;
    unsigned jobSize;
    unsigned overlapSizeLog;

    /* Long distance matching parameters */
    ldmParams_t ldmParams;

    /* For use with createCCtxParams() and freeCCtxParams() only */
    ZSTD_customMem customMem;

 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
 int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);

 /* custom memory allocation functions */
 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
 void ZSTD_defaultFreeFunction(void* opaque, void* address);
 #ifndef ZSTD_DLL_IMPORT
 static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
 #endif
 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
 void ZSTD_free(void* ptr, ZSTD_customMem customMem);


@@ -251,24 +317,27 @@ void ZSTD_free(void* ptr, ZSTD_customMem customMem);

 MEM_STATIC U32 ZSTD_highbit32(U32 val)
 {
    assert(val != 0);
    {
 #   if defined(_MSC_VER)   /* Visual */
    unsigned long r=0;
    _BitScanReverse(&r, val);
    return (unsigned)r;
        unsigned long r=0;
        _BitScanReverse(&r, val);
        return (unsigned)r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
    return 31 - __builtin_clz(val);
        return 31 - __builtin_clz(val);
 #   else   /* Software version */
    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
    int r;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
    return r;
        static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
        U32 v = val;
        int r;
        v |= v >> 1;
        v |= v >> 2;
        v |= v >> 4;
        v |= v >> 8;
        v |= v >> 16;
        r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
        return r;
 #   endif
    }
 }


@@ -281,4 +350,56 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)
 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);


 /*! ZSTD_initCStream_internal() :
 *  Private use only. Init streaming operation.
 *  expects params to be valid.
 *  must receive dict, or cdict, or none, but not both.
 *  @return : 0, or an error code */
 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                     const ZSTD_CDict* cdict,
                     ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);

 /*! ZSTD_compressStream_generic() :
 *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
                                   ZSTD_outBuffer* output,
                                   ZSTD_inBuffer* input,
                                   ZSTD_EndDirective const flushMode);

 /*! ZSTD_getCParamsFromCDict() :
 *  as the name implies */
 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);

 /* ZSTD_compressBegin_advanced_internal() :
 * Private use only. To be called from zstdmt_compress.c. */
 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                    const void* dict, size_t dictSize,
                                    ZSTD_dictMode_e dictMode,
                                    ZSTD_CCtx_params params,
                                    unsigned long long pledgedSrcSize);

 /* ZSTD_compress_advanced_internal() :
 * Private use only. To be called from zstdmt_compress.c. */
 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const void* dict,size_t dictSize,
                                 ZSTD_CCtx_params params);

 typedef struct {
    blockType_e blockType;
    U32 lastBlock;
    U32 origSize;
 } blockProperties_t;

 /*! ZSTD_getcBlockSize() :
 *   Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
                          blockProperties_t* bpPtr);

 #if defined (__cplusplus)
 }
 #endif

 #endif   /* ZSTD_CCOMMON_H_MODULE */
--- a/contrib/zstd/zstd_lazy.c
+++ b/contrib/zstd/zstd_lazy.c
@@ -0,0 +1,749 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #include "zstd_lazy.h"


 /*-*************************************
 *  Binary Tree search
 ***************************************/
 /** ZSTD_insertBt1() : add one or multiple positions to tree.
 *   ip : assumed <= iend-8 .
 *   @return : nb of positions added */
 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
                          U32 extDict)
 {
    U32*   const hashTable = zc->hashTable;
    U32    const hashLog = zc->appliedParams.cParams.hashLog;
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
    U32*   const bt = zc->chainTable;
    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
    U32    const btMask = (1 << btLog) - 1;
    U32 matchIndex = hashTable[h];
    size_t commonLengthSmaller=0, commonLengthLarger=0;
    const BYTE* const base = zc->base;
    const BYTE* const dictBase = zc->dictBase;
    const U32 dictLimit = zc->dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* match;
    const U32 current = (U32)(ip-base);
    const U32 btLow = btMask >= current ? 0 : current - btMask;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = smallerPtr + 1;
    U32 dummy32;   /* to be nullified at the end */
    U32 const windowLow = zc->lowLimit;
    U32 matchEndIdx = current+8;
    size_t bestLength = 8;
 #ifdef ZSTD_C_PREDICT
    U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
    U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
    predictedSmall += (predictedSmall>0);
    predictedLarge += (predictedLarge>0);
 #endif /* ZSTD_C_PREDICT */

    assert(ip <= iend-8);   /* required for h calculation */
    hashTable[h] = current;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */

 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
        if (matchIndex == predictedSmall) {
            /* no need to check length, result known */
            *smallerPtr = matchIndex;
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
            continue;
        }
        if (matchIndex == predictedLarge) {
            *largerPtr = matchIndex;
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
            continue;
        }
 #endif
        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
            match = base + matchIndex;
            if (match[matchLength] == ip[matchLength])
                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
        } else {
            match = dictBase + matchIndex;
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
            if (matchIndex+matchLength >= dictLimit)
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
        }

        if (matchLength > bestLength) {
            bestLength = matchLength;
            if (matchLength > matchEndIdx - matchIndex)
                matchEndIdx = matchIndex + (U32)matchLength;
        }

        if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */

        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
            /* match+1 is smaller than current */
            *smallerPtr = matchIndex;             /* update smaller idx */
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
        } else {
            /* match is larger than current */
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
    }   }

    *smallerPtr = *largerPtr = 0;
    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
    if (matchEndIdx > current + 8) return matchEndIdx - (current + 8);
    return 1;
 }


 static size_t ZSTD_insertBtAndFindBestMatch (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iend,
                        size_t* offsetPtr,
                        U32 nbCompares, const U32 mls,
                        U32 extDict)
 {
    U32*   const hashTable = zc->hashTable;
    U32    const hashLog = zc->appliedParams.cParams.hashLog;
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
    U32*   const bt = zc->chainTable;
    U32    const btLog  = zc->appliedParams.cParams.chainLog - 1;
    U32    const btMask = (1 << btLog) - 1;
    U32 matchIndex  = hashTable[h];
    size_t commonLengthSmaller=0, commonLengthLarger=0;
    const BYTE* const base = zc->base;
    const BYTE* const dictBase = zc->dictBase;
    const U32 dictLimit = zc->dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const U32 current = (U32)(ip-base);
    const U32 btLow = btMask >= current ? 0 : current - btMask;
    const U32 windowLow = zc->lowLimit;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = bt + 2*(current&btMask) + 1;
    U32 matchEndIdx = current+8;
    U32 dummy32;   /* to be nullified at the end */
    size_t bestLength = 0;

    assert(ip <= iend-8);   /* required for h calculation */
    hashTable[h] = current;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
        const BYTE* match;

        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
            match = base + matchIndex;
            if (match[matchLength] == ip[matchLength])
                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
        } else {
            match = dictBase + matchIndex;
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
            if (matchIndex+matchLength >= dictLimit)
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
        }

        if (matchLength > bestLength) {
            if (matchLength > matchEndIdx - matchIndex)
                matchEndIdx = matchIndex + (U32)matchLength;
            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
        }

        if (match[matchLength] < ip[matchLength]) {
            /* match is smaller than current */
            *smallerPtr = matchIndex;             /* update smaller idx */
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
        } else {
            /* match is larger than current */
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
    }   }

    *smallerPtr = *largerPtr = 0;

    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
    return bestLength;
 }


 void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
 {
    const BYTE* const base = zc->base;
    const U32 target = (U32)(ip - base);
    U32 idx = zc->nextToUpdate;

    while(idx < target)
        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
 }

 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
 static size_t ZSTD_BtFindBestMatch (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 mls)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
 }


 static size_t ZSTD_BtFindBestMatch_selectMLS (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 matchLengthSearch)
 {
    switch(matchLengthSearch)
    {
    default : /* includes case 3 */
    case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
    case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
    case 7 :
    case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
    }
 }


 void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
 {
    const BYTE* const base = zc->base;
    const U32 target = (U32)(ip - base);
    U32 idx = zc->nextToUpdate;

    while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
 }


 /** Tree updater, providing best match */
 static size_t ZSTD_BtFindBestMatch_extDict (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 mls)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
 }


 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 matchLengthSearch)
 {
    switch(matchLengthSearch)
    {
    default : /* includes case 3 */
    case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
    case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
    case 7 :
    case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
    }
 }



 /* *********************************
 *  Hash Chain
 ***********************************/
 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]

 /* Update chains up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
    U32* const hashTable  = zc->hashTable;
    const U32 hashLog = zc->appliedParams.cParams.hashLog;
    U32* const chainTable = zc->chainTable;
    const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
    const BYTE* const base = zc->base;
    const U32 target = (U32)(ip - base);
    U32 idx = zc->nextToUpdate;

    while(idx < target) { /* catch up */
        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
        hashTable[h] = idx;
        idx++;
    }

    zc->nextToUpdate = target;
    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
 }


 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_HcFindBestMatch_generic (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* const ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 mls, const U32 extDict)
 {
    U32* const chainTable = zc->chainTable;
    const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
    const U32 chainMask = chainSize-1;
    const BYTE* const base = zc->base;
    const BYTE* const dictBase = zc->dictBase;
    const U32 dictLimit = zc->dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const U32 lowLimit = zc->lowLimit;
    const U32 current = (U32)(ip-base);
    const U32 minChain = current > chainSize ? current - chainSize : 0;
    int nbAttempts=maxNbAttempts;
    size_t ml=4-1;

    /* HC4 match finder */
    U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);

    for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
        const BYTE* match;
        size_t currentMl=0;
        if ((!extDict) || matchIndex >= dictLimit) {
            match = base + matchIndex;
            if (match[ml] == ip[ml])   /* potentially better */
                currentMl = ZSTD_count(ip, match, iLimit);
        } else {
            match = dictBase + matchIndex;
            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
        }

        /* save best solution */
        if (currentMl > ml) {
            ml = currentMl;
            *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
        }

        if (matchIndex <= minChain) break;
        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
    }

    return ml;
 }


 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
                        ZSTD_CCtx* zc,
                        const BYTE* ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 matchLengthSearch)
 {
    switch(matchLengthSearch)
    {
    default : /* includes case 3 */
    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
    case 7 :
    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
    }
 }


 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
                        ZSTD_CCtx* zc,
                        const BYTE* ip, const BYTE* const iLimit,
                        size_t* offsetPtr,
                        const U32 maxNbAttempts, const U32 matchLengthSearch)
 {
    switch(matchLengthSearch)
    {
    default : /* includes case 3 */
    case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
    case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
    case 7 :
    case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
    }
 }


 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                                       const void* src, size_t srcSize,
                                       const U32 searchMethod, const U32 depth)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base + ctx->dictLimit;

    U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
    U32 const mls = ctx->appliedParams.cParams.searchLength;

    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                        size_t* offsetPtr,
                        U32 maxNbAttempts, U32 matchLengthSearch);
    searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0;

    /* init */
    ip += (ip==base);
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    {   U32 const maxRep = (U32)(ip-base);
        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
    }

    /* Match Loop */
    while (ip < ilimit) {
        size_t matchLength=0;
        size_t offset=0;
        const BYTE* start=ip+1;

        /* check repCode */
        if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
            /* repcode : we take it */
            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            if (depth==0) goto _storeSequence;
        }

        /* first search (depth 0) */
        {   size_t offsetFound = 99999999;
            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
            if (ml2 > matchLength)
                matchLength = ml2, start = ip, offset=offsetFound;
        }

        if (matchLength < 4) {
            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
            continue;
        }

        /* let's try to find a better solution */
        if (depth>=1)
        while (ip<ilimit) {
            ip ++;
            if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                int const gain2 = (int)(mlRep * 3);
                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
                if ((mlRep >= 4) && (gain2 > gain1))
                    matchLength = mlRep, offset = 0, start = ip;
            }
            {   size_t offset2=99999999;
                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
                if ((ml2 >= 4) && (gain2 > gain1)) {
                    matchLength = ml2, offset = offset2, start = ip;
                    continue;   /* search a better one */
            }   }

            /* let's find an even better one */
            if ((depth==2) && (ip<ilimit)) {
                ip ++;
                if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                    size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                    int const gain2 = (int)(ml2 * 4);
                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
                    if ((ml2 >= 4) && (gain2 > gain1))
                        matchLength = ml2, offset = 0, start = ip;
                }
                {   size_t offset2=99999999;
                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
                    if ((ml2 >= 4) && (gain2 > gain1)) {
                        matchLength = ml2, offset = offset2, start = ip;
                        continue;
            }   }   }
            break;  /* nothing found : store previous solution */
        }

        /* NOTE:
         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
         * overflows the pointer, which is undefined behavior.
         */
        /* catch up */
        if (offset) {
            while ( (start > anchor)
                 && (start > base+offset-ZSTD_REP_MOVE)
                 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) )  /* only search for offset within prefix */
                { start--; matchLength++; }
            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
        }
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

        /* check immediate repcode */
        while ( (ip <= ilimit)
             && ((offset_2>0)
             & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
            /* store sequence */
            matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
            offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
            ip += matchLength;
            anchor = ip;
            continue;   /* faster when present ... (?) */
    }   }

    /* Save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
    seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
 }

 size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
 }

 size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
 }

 size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
 }


 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
                                     const void* src, size_t srcSize,
                                     const U32 searchMethod, const U32 depth)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base;
    const U32 dictLimit = ctx->dictLimit;
    const U32 lowestIndex = ctx->lowLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const dictEnd  = dictBase + dictLimit;
    const BYTE* const dictStart  = dictBase + ctx->lowLimit;

    const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
    const U32 mls = ctx->appliedParams.cParams.searchLength;

    typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
                        size_t* offsetPtr,
                        U32 maxNbAttempts, U32 matchLengthSearch);
    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;

    U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1];

    /* init */
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ip += (ip == prefixStart);

    /* Match Loop */
    while (ip < ilimit) {
        size_t matchLength=0;
        size_t offset=0;
        const BYTE* start=ip+1;
        U32 current = (U32)(ip-base);

        /* check repCode */
        {   const U32 repIndex = (U32)(current+1 - offset_1);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */
            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                if (depth==0) goto _storeSequence;
        }   }

        /* first search (depth 0) */
        {   size_t offsetFound = 99999999;
            size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
            if (ml2 > matchLength)
                matchLength = ml2, start = ip, offset=offsetFound;
        }

         if (matchLength < 4) {
            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
            continue;
        }

        /* let's try to find a better solution */
        if (depth>=1)
        while (ip<ilimit) {
            ip ++;
            current++;
            /* check repCode */
            if (offset) {
                const U32 repIndex = (U32)(current - offset_1);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
                if (MEM_read32(ip) == MEM_read32(repMatch)) {
                    /* repcode detected */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                    int const gain2 = (int)(repLength * 3);
                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
                    if ((repLength >= 4) && (gain2 > gain1))
                        matchLength = repLength, offset = 0, start = ip;
            }   }

            /* search match, depth 1 */
            {   size_t offset2=99999999;
                size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
                if ((ml2 >= 4) && (gain2 > gain1)) {
                    matchLength = ml2, offset = offset2, start = ip;
                    continue;   /* search a better one */
            }   }

            /* let's find an even better one */
            if ((depth==2) && (ip<ilimit)) {
                ip ++;
                current++;
                /* check repCode */
                if (offset) {
                    const U32 repIndex = (U32)(current - offset_1);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                        int const gain2 = (int)(repLength * 4);
                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
                        if ((repLength >= 4) && (gain2 > gain1))
                            matchLength = repLength, offset = 0, start = ip;
                }   }

                /* search match, depth 2 */
                {   size_t offset2=99999999;
                    size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
                    if ((ml2 >= 4) && (gain2 > gain1)) {
                        matchLength = ml2, offset = offset2, start = ip;
                        continue;
            }   }   }
            break;  /* nothing found : store previous solution */
        }

        /* catch up */
        if (offset) {
            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
        }

        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

        /* check immediate repcode */
        while (ip <= ilimit) {
            const U32 repIndex = (U32)((ip-base) - offset_2);
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
            if (MEM_read32(ip) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
            }
            break;
    }   }

    /* Save reps for next block */
    seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
 }

 size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
 }

 size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
 }

 size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
 }
--- a/contrib/zstd/zstd_lazy.h
+++ b/contrib/zstd/zstd_lazy.h
@@ -0,0 +1,38 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_LAZY_H
 #define ZSTD_LAZY_H

 #include "zstd_compress.h"

 #if defined (__cplusplus)
 extern "C" {
 #endif

 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
 void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
 void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);

 size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

 size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

 #if defined (__cplusplus)
 }
 #endif

 #endif /* ZSTD_LAZY_H */
--- a/contrib/zstd/zstd_ldm.c
+++ b/contrib/zstd/zstd_ldm.c
@@ -0,0 +1,703 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 */

 #include "zstd_ldm.h"

 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */

 #define LDM_BUCKET_SIZE_LOG 3
 #define LDM_MIN_MATCH_LENGTH 64
 #define LDM_HASH_LOG 20
 #define LDM_HASH_CHAR_OFFSET 10

 size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
 {
    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
    params->enableLdm = enableLdm>0;
    params->hashLog = LDM_HASH_LOG;
    params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
    params->minMatchLength = LDM_MIN_MATCH_LENGTH;
    params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
    return 0;
 }

 void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
 {
    if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
        params->hashEveryLog =
                windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
    }
    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
 }

 size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
    size_t const ldmHSize = ((size_t)1) << hashLog;
    size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
    size_t const ldmBucketSize =
        ((size_t)1) << (hashLog - ldmBucketSizeLog);
    return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
 }

 /** ZSTD_ldm_getSmallHash() :
 *  numBits should be <= 32
 *  If numBits==0, returns 0.
 *  @return : the most significant numBits of value. */
 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
 {
    assert(numBits <= 32);
    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
 }

 /** ZSTD_ldm_getChecksum() :
 *  numBitsToDiscard should be <= 32
 *  @return : the next most significant 32 bits after numBitsToDiscard */
 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
 {
    assert(numBitsToDiscard <= 32);
    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
 }

 /** ZSTD_ldm_getTag() ;
 *  Given the hash, returns the most significant numTagBits bits
 *  after (32 + hbits) bits.
 *
 *  If there are not enough bits remaining, return the last
 *  numTagBits bits. */
 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
 {
    assert(numTagBits < 32 && hbits <= 32);
    if (32 - hbits < numTagBits) {
        return hash & (((U32)1 << numTagBits) - 1);
    } else {
        return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
    }
 }

 /** ZSTD_ldm_getBucket() :
 *  Returns a pointer to the start of the bucket associated with hash. */
 static ldmEntry_t* ZSTD_ldm_getBucket(
        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
 {
    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
 }

 /** ZSTD_ldm_insertEntry() :
 *  Insert the entry with corresponding hash into the hash table */
 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
                                 size_t const hash, const ldmEntry_t entry,
                                 ldmParams_t const ldmParams)
 {
    BYTE* const bucketOffsets = ldmState->bucketOffsets;
    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
    bucketOffsets[hash]++;
    bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
 }

 /** ZSTD_ldm_makeEntryAndInsertByTag() :
 *
 *  Gets the small hash, checksum, and tag from the rollingHash.
 *
 *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
 *  creates an ldmEntry from the offset, and inserts it into the hash table.
 *
 *  hBits is the length of the small hash, which is the most significant hBits
 *  of rollingHash. The checksum is the next 32 most significant bits, followed
 *  by ldmParams.hashEveryLog bits that make up the tag. */
 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
                                             U64 const rollingHash,
                                             U32 const hBits,
                                             U32 const offset,
                                             ldmParams_t const ldmParams)
 {
    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
    U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
    if (tag == tagMask) {
        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
        ldmEntry_t entry;
        entry.offset = offset;
        entry.checksum = checksum;
        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
    }
 }

 /** ZSTD_ldm_getRollingHash() :
 *  Get a 64-bit hash using the first len bytes from buf.
 *
 *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
 *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
 *
 *  where the constant a is defined to be prime8bytes.
 *
 *  The implementation adds an offset to each byte, so
 *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
 {
    U64 ret = 0;
    U32 i;
    for (i = 0; i < len; i++) {
        ret *= prime8bytes;
        ret += buf[i] + LDM_HASH_CHAR_OFFSET;
    }
    return ret;
 }

 /** ZSTD_ldm_ipow() :
 *  Return base^exp. */
 static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
 {
    U64 ret = 1;
    while (exp) {
        if (exp & 1) { ret *= base; }
        exp >>= 1;
        base *= base;
    }
    return ret;
 }

 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
    assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
    return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
 }

 /** ZSTD_ldm_updateHash() :
 *  Updates hash by removing toRemove and adding toAdd. */
 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
 {
    hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
    hash *= prime8bytes;
    hash += toAdd + LDM_HASH_CHAR_OFFSET;
    return hash;
 }

 /** ZSTD_ldm_countBackwardsMatch() :
 *  Returns the number of bytes that match backwards before pIn and pMatch.
 *
 *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
 static size_t ZSTD_ldm_countBackwardsMatch(
            const BYTE* pIn, const BYTE* pAnchor,
            const BYTE* pMatch, const BYTE* pBase)
 {
    size_t matchLength = 0;
    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
        pIn--;
        pMatch--;
        matchLength++;
    }
    return matchLength;
 }

 /** ZSTD_ldm_fillFastTables() :
 *
 *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
 *  This is similar to ZSTD_loadDictionaryContent.
 *
 *  The tables for the other strategies are filled within their
 *  block compressors. */
 static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
 {
    const BYTE* const iend = (const BYTE*)end;
    const U32 mls = zc->appliedParams.cParams.searchLength;

    switch(zc->appliedParams.cParams.strategy)
    {
    case ZSTD_fast:
        ZSTD_fillHashTable(zc, iend, mls);
        zc->nextToUpdate = (U32)(iend - zc->base);
        break;

    case ZSTD_dfast:
        ZSTD_fillDoubleHashTable(zc, iend, mls);
        zc->nextToUpdate = (U32)(iend - zc->base);
        break;

    case ZSTD_greedy:
    case ZSTD_lazy:
    case ZSTD_lazy2:
    case ZSTD_btlazy2:
    case ZSTD_btopt:
    case ZSTD_btultra:
        break;
    default:
        assert(0);  /* not possible : not a valid strategy id */
    }

    return 0;
 }

 /** ZSTD_ldm_fillLdmHashTable() :
 *
 *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
 *  lastHash is the rolling hash that corresponds to lastHashed.
 *
 *  Returns the rolling hash corresponding to position iend-1. */
 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
                                     U64 lastHash, const BYTE* lastHashed,
                                     const BYTE* iend, const BYTE* base,
                                     U32 hBits, ldmParams_t const ldmParams)
 {
    U64 rollingHash = lastHash;
    const BYTE* cur = lastHashed + 1;

    while (cur < iend) {
        rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
                                          cur[ldmParams.minMatchLength-1],
                                          state->hashPower);
        ZSTD_ldm_makeEntryAndInsertByTag(state,
                                         rollingHash, hBits,
                                         (U32)(cur - base), ldmParams);
        ++cur;
    }
    return rollingHash;
 }


 /** ZSTD_ldm_limitTableUpdate() :
 *
 *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
 *  if it is far way
 *  (after a long match, only update tables a limited amount). */
 static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
 {
    U32 const current = (U32)(anchor - cctx->base);
    if (current > cctx->nextToUpdate + 1024) {
        cctx->nextToUpdate =
            current - MIN(512, current - cctx->nextToUpdate - 1024);
    }
 }

 typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 /* defined in zstd_compress.c */
 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);

 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
                                      const void* src, size_t srcSize)
 {
    ldmState_t* const ldmState = &(cctx->ldmState);
    const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
    const U64 hashPower = ldmState->hashPower;
    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
    const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
    const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
    seqStore_t* const seqStorePtr = &(cctx->seqStore);
    const BYTE* const base = cctx->base;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32   lowestIndex = cctx->dictLimit;
    const BYTE* const lowest = base + lowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);

    const ZSTD_blockCompressor blockCompressor =
        ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
    U32* const repToConfirm = seqStorePtr->repToConfirm;
    U32 savedRep[ZSTD_REP_NUM];
    U64 rollingHash = 0;
    const BYTE* lastHashed = NULL;
    size_t i, lastLiterals;

    /* Save seqStorePtr->rep and copy repToConfirm */
    for (i = 0; i < ZSTD_REP_NUM; i++)
        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];

    /* Main Search Loop */
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
        size_t mLength;
        U32 const current = (U32)(ip - base);
        size_t forwardMatchLength = 0, backwardMatchLength = 0;
        ldmEntry_t* bestEntry = NULL;
        if (ip != istart) {
            rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
                                              lastHashed[ldmParams.minMatchLength],
                                              hashPower);
        } else {
            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
        }
        lastHashed = ip;

        /* Do not insert and do not look for a match */
        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
                ldmTagMask) {
           ip++;
           continue;
        }

        /* Get the best entry and compute the match lengths */
        {
            ldmEntry_t* const bucket =
                ZSTD_ldm_getBucket(ldmState,
                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
                                   ldmParams);
            ldmEntry_t* cur;
            size_t bestMatchLength = 0;
            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);

            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
                const BYTE* const pMatch = cur->offset + base;
                size_t curForwardMatchLength, curBackwardMatchLength,
                       curTotalMatchLength;
                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
                    continue;
                }

                curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
                if (curForwardMatchLength < ldmParams.minMatchLength) {
                    continue;
                }
                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
                                             ip, anchor, pMatch, lowest);
                curTotalMatchLength = curForwardMatchLength +
                                      curBackwardMatchLength;

                if (curTotalMatchLength > bestMatchLength) {
                    bestMatchLength = curTotalMatchLength;
                    forwardMatchLength = curForwardMatchLength;
                    backwardMatchLength = curBackwardMatchLength;
                    bestEntry = cur;
                }
            }
        }

        /* No match found -- continue searching */
        if (bestEntry == NULL) {
            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
                                             hBits, current,
                                             ldmParams);
            ip++;
            continue;
        }

        /* Match found */
        mLength = forwardMatchLength + backwardMatchLength;
        ip -= backwardMatchLength;

        /* Call the block compressor on the remaining literals */
        {
            U32 const matchIndex = bestEntry->offset;
            const BYTE* const match = base + matchIndex - backwardMatchLength;
            U32 const offset = (U32)(ip - match);

            /* Overwrite rep codes */
            for (i = 0; i < ZSTD_REP_NUM; i++)
                seqStorePtr->rep[i] = repToConfirm[i];

            /* Fill tables for block compressor */
            ZSTD_ldm_limitTableUpdate(cctx, anchor);
            ZSTD_ldm_fillFastTables(cctx, anchor);

            /* Call block compressor and get remaining literals */
            lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
            cctx->nextToUpdate = (U32)(ip - base);

            /* Update repToConfirm with the new offset */
            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
                repToConfirm[i] = repToConfirm[i-1];
            repToConfirm[0] = offset;

            /* Store the sequence with the leftover literals */
            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
        }

        /* Insert the current entry into the hash table */
        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
                                         (U32)(lastHashed - base),
                                         ldmParams);

        assert(ip + backwardMatchLength == lastHashed);

        /* Fill the hash table from lastHashed+1 to ip+mLength*/
        /* Heuristic: don't need to fill the entire table at end of block */
        if (ip + mLength < ilimit) {
            rollingHash = ZSTD_ldm_fillLdmHashTable(
                              ldmState, rollingHash, lastHashed,
                              ip + mLength, base, hBits, ldmParams);
            lastHashed = ip + mLength - 1;
        }
        ip += mLength;
        anchor = ip;
        /* Check immediate repcode */
        while ( (ip < ilimit)
             && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
             && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {

            size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
                                              iend) + 4;
            /* Swap repToConfirm[1] <=> repToConfirm[0] */
            {
                U32 const tmpOff = repToConfirm[1];
                repToConfirm[1] = repToConfirm[0];
                repToConfirm[0] = tmpOff;
            }

            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);

            /* Fill the  hash table from lastHashed+1 to ip+rLength*/
            if (ip + rLength < ilimit) {
                rollingHash = ZSTD_ldm_fillLdmHashTable(
                                ldmState, rollingHash, lastHashed,
                                ip + rLength, base, hBits, ldmParams);
                lastHashed = ip + rLength - 1;
            }
            ip += rLength;
            anchor = ip;
        }
    }

    /* Overwrite rep */
    for (i = 0; i < ZSTD_REP_NUM; i++)
        seqStorePtr->rep[i] = repToConfirm[i];

    ZSTD_ldm_limitTableUpdate(cctx, anchor);
    ZSTD_ldm_fillFastTables(cctx, anchor);

    lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
    cctx->nextToUpdate = (U32)(iend - base);

    /* Restore seqStorePtr->rep */
    for (i = 0; i < ZSTD_REP_NUM; i++)
        seqStorePtr->rep[i] = savedRep[i];

    /* Return the last literals size */
    return lastLiterals;
 }

 size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
                              const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
 }

 static size_t ZSTD_compressBlock_ldm_extDict_generic(
                                 ZSTD_CCtx* ctx,
                                 const void* src, size_t srcSize)
 {
    ldmState_t* const ldmState = &(ctx->ldmState);
    const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
    const U64 hashPower = ldmState->hashPower;
    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
    const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
    const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
    seqStore_t* const seqStorePtr = &(ctx->seqStore);
    const BYTE* const base = ctx->base;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const U32   lowestIndex = ctx->lowLimit;
    const BYTE* const dictStart = dictBase + lowestIndex;
    const U32   dictLimit = ctx->dictLimit;
    const BYTE* const lowPrefixPtr = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);

    const ZSTD_blockCompressor blockCompressor =
        ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
    U32* const repToConfirm = seqStorePtr->repToConfirm;
    U32 savedRep[ZSTD_REP_NUM];
    U64 rollingHash = 0;
    const BYTE* lastHashed = NULL;
    size_t i, lastLiterals;

    /* Save seqStorePtr->rep and copy repToConfirm */
    for (i = 0; i < ZSTD_REP_NUM; i++) {
        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
    }

    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        size_t mLength;
        const U32 current = (U32)(ip-base);
        size_t forwardMatchLength = 0, backwardMatchLength = 0;
        ldmEntry_t* bestEntry = NULL;
        if (ip != istart) {
          rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
                                       lastHashed[ldmParams.minMatchLength],
                                       hashPower);
        } else {
            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
        }
        lastHashed = ip;

        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
                ldmTagMask) {
            /* Don't insert and don't look for a match */
           ip++;
           continue;
        }

        /* Get the best entry and compute the match lengths */
        {
            ldmEntry_t* const bucket =
                ZSTD_ldm_getBucket(ldmState,
                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
                                   ldmParams);
            ldmEntry_t* cur;
            size_t bestMatchLength = 0;
            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);

            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
                const BYTE* const curMatchBase =
                    cur->offset < dictLimit ? dictBase : base;
                const BYTE* const pMatch = curMatchBase + cur->offset;
                const BYTE* const matchEnd =
                    cur->offset < dictLimit ? dictEnd : iend;
                const BYTE* const lowMatchPtr =
                    cur->offset < dictLimit ? dictStart : lowPrefixPtr;
                size_t curForwardMatchLength, curBackwardMatchLength,
                       curTotalMatchLength;

                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
                    continue;
                }

                curForwardMatchLength = ZSTD_count_2segments(
                                            ip, pMatch, iend,
                                            matchEnd, lowPrefixPtr);
                if (curForwardMatchLength < ldmParams.minMatchLength) {
                    continue;
                }
                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
                                             ip, anchor, pMatch, lowMatchPtr);
                curTotalMatchLength = curForwardMatchLength +
                                      curBackwardMatchLength;

                if (curTotalMatchLength > bestMatchLength) {
                    bestMatchLength = curTotalMatchLength;
                    forwardMatchLength = curForwardMatchLength;
                    backwardMatchLength = curBackwardMatchLength;
                    bestEntry = cur;
                }
            }
        }

        /* No match found -- continue searching */
        if (bestEntry == NULL) {
            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
                                             (U32)(lastHashed - base),
                                             ldmParams);
            ip++;
            continue;
        }

        /* Match found */
        mLength = forwardMatchLength + backwardMatchLength;
        ip -= backwardMatchLength;

        /* Call the block compressor on the remaining literals */
        {
            /* ip = current - backwardMatchLength
             * The match is at (bestEntry->offset - backwardMatchLength) */
            U32 const matchIndex = bestEntry->offset;
            U32 const offset = current - matchIndex;

            /* Overwrite rep codes */
            for (i = 0; i < ZSTD_REP_NUM; i++)
                seqStorePtr->rep[i] = repToConfirm[i];

            /* Fill the hash table for the block compressor */
            ZSTD_ldm_limitTableUpdate(ctx, anchor);
            ZSTD_ldm_fillFastTables(ctx, anchor);

            /* Call block compressor and get remaining literals  */
            lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
            ctx->nextToUpdate = (U32)(ip - base);

            /* Update repToConfirm with the new offset */
            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
                repToConfirm[i] = repToConfirm[i-1];
            repToConfirm[0] = offset;

            /* Store the sequence with the leftover literals */
            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
        }

        /* Insert the current entry into the hash table */
        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
                                         (U32)(lastHashed - base),
                                         ldmParams);

        /* Fill the hash table from lastHashed+1 to ip+mLength */
        assert(ip + backwardMatchLength == lastHashed);
        if (ip + mLength < ilimit) {
            rollingHash = ZSTD_ldm_fillLdmHashTable(
                              ldmState, rollingHash, lastHashed,
                              ip + mLength, base, hBits,
                              ldmParams);
            lastHashed = ip + mLength - 1;
        }
        ip += mLength;
        anchor = ip;

        /* check immediate repcode */
        while (ip < ilimit) {
            U32 const current2 = (U32)(ip-base);
            U32 const repIndex2 = current2 - repToConfirm[1];
            const BYTE* repMatch2 = repIndex2 < dictLimit ?
                                    dictBase + repIndex2 : base + repIndex2;
            if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
                        (repIndex2 > lowestIndex))  /* intentional overflow */
               && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                const BYTE* const repEnd2 = repIndex2 < dictLimit ?
                                            dictEnd : iend;
                size_t const repLength2 =
                        ZSTD_count_2segments(ip+4, repMatch2+4, iend,
                                             repEnd2, lowPrefixPtr) + 4;

                U32 tmpOffset = repToConfirm[1];
                repToConfirm[1] = repToConfirm[0];
                repToConfirm[0] = tmpOffset;

                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);

                /* Fill the  hash table from lastHashed+1 to ip+repLength2*/
                if (ip + repLength2 < ilimit) {
                    rollingHash = ZSTD_ldm_fillLdmHashTable(
                                      ldmState, rollingHash, lastHashed,
                                      ip + repLength2, base, hBits,
                                      ldmParams);
                    lastHashed = ip + repLength2 - 1;
                }
                ip += repLength2;
                anchor = ip;
                continue;
            }
            break;
        }
    }

    /* Overwrite rep */
    for (i = 0; i < ZSTD_REP_NUM; i++)
        seqStorePtr->rep[i] = repToConfirm[i];

    ZSTD_ldm_limitTableUpdate(ctx, anchor);
    ZSTD_ldm_fillFastTables(ctx, anchor);

    /* Call the block compressor one last time on the last literals */
    lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
    ctx->nextToUpdate = (U32)(iend - base);

    /* Restore seqStorePtr->rep */
    for (i = 0; i < ZSTD_REP_NUM; i++)
        seqStorePtr->rep[i] = savedRep[i];

    /* Return the last literals size */
    return lastLiterals;
 }

 size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
 }
--- a/contrib/zstd/zstd_ldm.h
+++ b/contrib/zstd/zstd_ldm.h
@@ -0,0 +1,67 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 */

 #ifndef ZSTD_LDM_H
 #define ZSTD_LDM_H

 #include "zstd_compress.h"

 #if defined (__cplusplus)
 extern "C" {
 #endif

 /*-*************************************
 *  Long distance matching
 ***************************************/

 #define ZSTD_LDM_WINDOW_LOG 27
 #define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999

 /** ZSTD_compressBlock_ldm_generic() :
 *
 *  This is a block compressor intended for long distance matching.
 *
 *  The function searches for matches of length at least
 *  ldmParams.minMatchLength using a hash table in cctx->ldmState.
 *  Matches can be at a distance of up to cParams.windowLog.
 *
 *  Upon finding a match, the unmatched literals are compressed using a
 *  ZSTD_blockCompressor (depending on the strategy in the compression
 *  parameters), which stores the matched sequences. The "long distance"
 *  match is then stored with the remaining literals from the
 *  ZSTD_blockCompressor. */
 size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize);

 /** ZSTD_ldm_initializeParameters() :
 *  Initialize the long distance matching parameters to their default values. */
 size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm);

 /** ZSTD_ldm_getTableSize() :
 *  Estimate the space needed for long distance matching tables. */
 size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog);

 /** ZSTD_ldm_getTableSize() :
 *  Return prime8bytes^(minMatchLength-1) */
 U64 ZSTD_ldm_getHashPower(U32 minMatchLength);

 /** ZSTD_ldm_adjustParameters() :
 *  If the params->hashEveryLog is not set, set it to its default value based on
 *  windowLog and params->hashLog.
 *
 *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
 *  params->hashLog if it is not). */
 void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog);

 #if defined (__cplusplus)
 }
 #endif

 #endif /* ZSTD_FAST_H */
--- a/contrib/zstd/zstd_opt.c
+++ b/contrib/zstd/zstd_opt.c
@@ -0,0 +1,949 @@
 /*
 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #include "zstd_opt.h"
 #include "zstd_lazy.h"


 #define ZSTD_LITFREQ_ADD    2
 #define ZSTD_FREQ_DIV       4
 #define ZSTD_MAX_PRICE      (1<<30)

 /*-*************************************
 *  Price functions for optimal parser
 ***************************************/
 static void ZSTD_setLog2Prices(optState_t* optPtr)
 {
    optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
    optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
    optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
    optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
    optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum));
 }


 static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize)
 {
    unsigned u;

    optPtr->cachedLiterals = NULL;
    optPtr->cachedPrice = optPtr->cachedLitLength = 0;
    optPtr->staticPrices = 0;

    if (optPtr->litLengthSum == 0) {
        if (srcSize <= 1024) optPtr->staticPrices = 1;

        assert(optPtr->litFreq!=NULL);
        for (u=0; u<=MaxLit; u++)
            optPtr->litFreq[u] = 0;
        for (u=0; u<srcSize; u++)
            optPtr->litFreq[src[u]]++;

        optPtr->litSum = 0;
        optPtr->litLengthSum = MaxLL+1;
        optPtr->matchLengthSum = MaxML+1;
        optPtr->offCodeSum = (MaxOff+1);
        optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);

        for (u=0; u<=MaxLit; u++) {
            optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV);
            optPtr->litSum += optPtr->litFreq[u];
        }
        for (u=0; u<=MaxLL; u++)
            optPtr->litLengthFreq[u] = 1;
        for (u=0; u<=MaxML; u++)
            optPtr->matchLengthFreq[u] = 1;
        for (u=0; u<=MaxOff; u++)
            optPtr->offCodeFreq[u] = 1;
    } else {
        optPtr->matchLengthSum = 0;
        optPtr->litLengthSum = 0;
        optPtr->offCodeSum = 0;
        optPtr->matchSum = 0;
        optPtr->litSum = 0;

        for (u=0; u<=MaxLit; u++) {
            optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
            optPtr->litSum += optPtr->litFreq[u];
        }
        for (u=0; u<=MaxLL; u++) {
            optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
            optPtr->litLengthSum += optPtr->litLengthFreq[u];
        }
        for (u=0; u<=MaxML; u++) {
            optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
            optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
            optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3);
        }
        optPtr->matchSum *= ZSTD_LITFREQ_ADD;
        for (u=0; u<=MaxOff; u++) {
            optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
            optPtr->offCodeSum += optPtr->offCodeFreq[u];
        }
    }

    ZSTD_setLog2Prices(optPtr);
 }


 static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals)
 {
    U32 price, u;

    if (optPtr->staticPrices)
        return ZSTD_highbit32((U32)litLength+1) + (litLength*6);

    if (litLength == 0)
        return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1);

    /* literals */
    if (optPtr->cachedLiterals == literals) {
        U32 const additional = litLength - optPtr->cachedLitLength;
        const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength;
        price = optPtr->cachedPrice + additional * optPtr->log2litSum;
        for (u=0; u < additional; u++)
            price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1);
        optPtr->cachedPrice = price;
        optPtr->cachedLitLength = litLength;
    } else {
        price = litLength * optPtr->log2litSum;
        for (u=0; u < litLength; u++)
            price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);

        if (litLength >= 12) {
            optPtr->cachedLiterals = literals;
            optPtr->cachedPrice = price;
            optPtr->cachedLitLength = litLength;
        }
    }

    /* literal Length */
    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
    }

    return price;
 }


 FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
    /* offset */
    U32 price;
    BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);

    if (optPtr->staticPrices)
        return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;

    price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
    if (!ultra && offCode >= 20) price += (offCode-19)*2;

    /* match Length */
    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
    }

    return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor;
 }


 static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
    U32 u;

    /* literals */
    optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
    for (u=0; u < litLength; u++)
        optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;

    /* literal Length */
    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        optPtr->litLengthFreq[llCode]++;
        optPtr->litLengthSum++;
    }

    /* match offset */
    {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
        optPtr->offCodeSum++;
        optPtr->offCodeFreq[offCode]++;
    }

    /* match Length */
    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        optPtr->matchLengthFreq[mlCode]++;
        optPtr->matchLengthSum++;
    }

    ZSTD_setLog2Prices(optPtr);
 }


 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
    {                                                 \
        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
        opt[pos].mlen = mlen_;                         \
        opt[pos].off = offset_;                        \
        opt[pos].litlen = litlen_;                     \
        opt[pos].price = price_;                       \
    }


 /* function safe only for comparisons */
 static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 {
    switch (length)
    {
    default :
    case 4 : return MEM_read32(memPtr);
    case 3 : if (MEM_isLittleEndian())
                return MEM_read32(memPtr)<<8;
             else
                return MEM_read32(memPtr)>>8;
    }
 }


 /* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
 static
 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
 {
    U32* const hashTable3  = zc->hashTable3;
    U32 const hashLog3  = zc->hashLog3;
    const BYTE* const base = zc->base;
    U32 idx = zc->nextToUpdate3;
    const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
    const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);

    while(idx < target) {
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
        idx++;
    }

    return hashTable3[hash3];
 }


 /*-*************************************
 *  Binary Tree search
 ***************************************/
 static U32 ZSTD_insertBtAndGetAllMatches (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        U32 nbCompares, const U32 mls,
                        U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    const BYTE* const base = zc->base;
    const U32 current = (U32)(ip-base);
    const U32 hashLog = zc->appliedParams.cParams.hashLog;
    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
    U32* const hashTable = zc->hashTable;
    U32 matchIndex  = hashTable[h];
    U32* const bt   = zc->chainTable;
    const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
    const U32 btMask= (1U << btLog) - 1;
    size_t commonLengthSmaller=0, commonLengthLarger=0;
    const BYTE* const dictBase = zc->dictBase;
    const U32 dictLimit = zc->dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const U32 btLow = btMask >= current ? 0 : current - btMask;
    const U32 windowLow = zc->lowLimit;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = bt + 2*(current&btMask) + 1;
    U32 matchEndIdx = current+8;
    U32 dummy32;   /* to be nullified at the end */
    U32 mnum = 0;

    const U32 minMatch = (mls == 3) ? 3 : 4;
    size_t bestLength = minMatchLen-1;

    if (minMatch == 3) { /* HC3 match finder */
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
        if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
            const BYTE* match;
            size_t currentMl=0;
            if ((!extDict) || matchIndex3 >= dictLimit) {
                match = base + matchIndex3;
                if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
            } else {
                match = dictBase + matchIndex3;
                if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
                    currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
            }

            /* save best solution */
            if (currentMl > bestLength) {
                bestLength = currentMl;
                matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
                matches[mnum].len = (U32)currentMl;
                mnum++;
                if (currentMl > ZSTD_OPT_NUM) goto update;
                if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
            }
        }
    }

    hashTable[h] = current;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
        const BYTE* match;

        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
            match = base + matchIndex;
            if (match[matchLength] == ip[matchLength]) {
                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
            }
        } else {
            match = dictBase + matchIndex;
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
            if (matchIndex+matchLength >= dictLimit)
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
        }

        if (matchLength > bestLength) {
            if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
            bestLength = matchLength;
            matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
            matches[mnum].len = (U32)matchLength;
            mnum++;
            if (matchLength > ZSTD_OPT_NUM) break;
            if (ip+matchLength == iLimit)   /* equal : no way to know if inf or sup */
                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
        }

        if (match[matchLength] < ip[matchLength]) {
            /* match is smaller than current */
            *smallerPtr = matchIndex;             /* update smaller idx */
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
        } else {
            /* match is larger than current */
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
    }   }

    *smallerPtr = *largerPtr = 0;

 update:
    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
    return mnum;
 }


 /** Tree updater, providing best match */
 static U32 ZSTD_BtGetAllMatches (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
 }


 static U32 ZSTD_BtGetAllMatches_selectMLS (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iHighLimit,
                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    switch(matchLengthSearch)
    {
    case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
    default :
    case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
    case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
    case 7 :
    case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
    }
 }

 /** Tree updater, providing best match */
 static U32 ZSTD_BtGetAllMatches_extDict (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
 }


 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iHighLimit,
                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    switch(matchLengthSearch)
    {
    case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
    default :
    case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
    case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
    case 7 :
    case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
    }
 }


 /*-*******************************
 *  Optimal parser
 *********************************/
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                                      const void* src, size_t srcSize, const int ultra)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    optState_t* optStatePtr = &(ctx->optState);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base;
    const BYTE* const prefixStart = base + ctx->dictLimit;

    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
    const U32 mls = ctx->appliedParams.cParams.searchLength;
    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;

    ZSTD_optimal_t* opt = optStatePtr->priceTable;
    ZSTD_match_t* matches = optStatePtr->matchTable;
    const BYTE* inr;
    U32 offset, rep[ZSTD_REP_NUM];

    /* init */
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
    ip += (ip==prefixStart);
    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }

    /* Match Loop */
    while (ip < ilimit) {
        U32 cur, match_num, last_pos, litlen, price;
        U32 u, mlen, best_mlen, best_off, litLength;
        memset(opt, 0, sizeof(ZSTD_optimal_t));
        last_pos = 0;
        litlen = (U32)(ip - anchor);

        /* check repCode */
        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
            for (i=(ip == anchor); i<last_i; i++) {
                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
                    mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
                    if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                        best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                        goto _storeSequence;
                    }
                    best_off = i - (ip == anchor);
                    do {
                        price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                        if (mlen > last_pos || price < opt[mlen].price)
                            SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                        mlen--;
                    } while (mlen >= minMatch);
        }   }   }

        match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);

        if (!last_pos && !match_num) { ip++; continue; }

        if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
            best_mlen = matches[match_num-1].len;
            best_off = matches[match_num-1].off;
            cur = 0;
            last_pos = 1;
            goto _storeSequence;
        }

        /* set prices using matches at position = 0 */
        best_mlen = (last_pos) ? last_pos : minMatch;
        for (u = 0; u < match_num; u++) {
            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
            best_mlen = matches[u].len;
            while (mlen <= best_mlen) {
                price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                if (mlen > last_pos || price < opt[mlen].price)
                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                mlen++;
        }   }

        if (last_pos < minMatch) { ip++; continue; }

        /* initialize opt[0] */
        { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;
        opt[0].litlen = litlen;

         /* check further positions */
        for (cur = 1; cur <= last_pos; cur++) {
           inr = ip + cur;

           if (opt[cur-1].mlen == 1) {
                litlen = opt[cur-1].litlen + 1;
                if (cur > litlen) {
                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
                } else
                    price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
           } else {
                litlen = 1;
                price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
           }

           if (cur > last_pos || price <= opt[cur].price)
                SET_PRICE(cur, 1, 0, litlen, price);

           if (cur == last_pos) break;

           if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
               continue;

           mlen = opt[cur].mlen;
           if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                opt[cur].rep[2] = opt[cur-mlen].rep[1];
                opt[cur].rep[1] = opt[cur-mlen].rep[0];
                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
           } else {
                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
           }

            best_mlen = minMatch;
            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                    if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
                       && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
                       mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;

                       if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
                            goto _storeSequence;
                       }

                       best_off = i - (opt[cur].mlen != 1);
                       if (mlen > best_mlen) best_mlen = mlen;

                       do {
                           if (opt[cur].mlen == 1) {
                                litlen = opt[cur].litlen;
                                if (cur > litlen) {
                                    price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                } else
                                    price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                            } else {
                                litlen = 0;
                                price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                            }

                            if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                SET_PRICE(cur + mlen, mlen, i, litlen, price);
                            mlen--;
                        } while (mlen >= minMatch);
            }   }   }

            match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);

            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                best_mlen = matches[match_num-1].len;
                best_off = matches[match_num-1].off;
                last_pos = cur + 1;
                goto _storeSequence;
            }

            /* set prices using matches at position = cur */
            for (u = 0; u < match_num; u++) {
                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
                best_mlen = matches[u].len;

                while (mlen <= best_mlen) {
                    if (opt[cur].mlen == 1) {
                        litlen = opt[cur].litlen;
                        if (cur > litlen)
                            price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                        else
                            price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                    } else {
                        litlen = 0;
                        price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                    }

                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);

                    mlen++;
        }   }   }

        best_mlen = opt[last_pos].mlen;
        best_off = opt[last_pos].off;
        cur = last_pos - best_mlen;

        /* store sequence */
 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
        opt[0].mlen = 1;

        while (1) {
            mlen = opt[cur].mlen;
            offset = opt[cur].off;
            opt[cur].mlen = best_mlen;
            opt[cur].off = best_off;
            best_mlen = mlen;
            best_off = offset;
            if (mlen > cur) break;
            cur -= mlen;
        }

        for (u = 0; u <= last_pos;) {
            u += opt[u].mlen;
        }

        for (cur=0; cur < last_pos; ) {
            mlen = opt[cur].mlen;
            if (mlen == 1) { ip++; cur++; continue; }
            offset = opt[cur].off;
            cur += mlen;
            litLength = (U32)(ip - anchor);

            if (offset > ZSTD_REP_MOVE_OPT) {
                rep[2] = rep[1];
                rep[1] = rep[0];
                rep[0] = offset - ZSTD_REP_MOVE_OPT;
                offset--;
            } else {
                if (offset != 0) {
                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                    if (offset != 1) rep[2] = rep[1];
                    rep[1] = rep[0];
                    rep[0] = best_off;
                }
                if (litLength==0) offset--;
            }

            ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            anchor = ip = ip + mlen;
    }    }   /* for (cur=0; cur < last_pos; ) */

    /* Save reps for next block */
    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
 }

 size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
 }


 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                                     const void* src, size_t srcSize, const int ultra)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    optState_t* optStatePtr = &(ctx->optState);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base;
    const U32 lowestIndex = ctx->lowLimit;
    const U32 dictLimit = ctx->dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const dictEnd  = dictBase + dictLimit;

    const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
    const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
    const U32 mls = ctx->appliedParams.cParams.searchLength;
    const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;

    ZSTD_optimal_t* opt = optStatePtr->priceTable;
    ZSTD_match_t* matches = optStatePtr->matchTable;
    const BYTE* inr;

    /* init */
    U32 offset, rep[ZSTD_REP_NUM];
    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }

    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
    ip += (ip==prefixStart);

    /* Match Loop */
    while (ip < ilimit) {
        U32 cur, match_num, last_pos, litlen, price;
        U32 u, mlen, best_mlen, best_off, litLength;
        U32 current = (U32)(ip-base);
        memset(opt, 0, sizeof(ZSTD_optimal_t));
        last_pos = 0;
        opt[0].litlen = (U32)(ip - anchor);

        /* check repCode */
        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
            for (i = (ip==anchor); i<last_i; i++) {
                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                const U32 repIndex = (U32)(current - repCur);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
                if ( (repCur > 0 && repCur <= (S32)current)
                   && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                    /* repcode detected we should take it */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                    mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;

                    if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                        best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                        goto _storeSequence;
                    }

                    best_off = i - (ip==anchor);
                    litlen = opt[0].litlen;
                    do {
                        price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                        if (mlen > last_pos || price < opt[mlen].price)
                            SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                        mlen--;
                    } while (mlen >= minMatch);
        }   }   }

        match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch);  /* first search (depth 0) */

        if (!last_pos && !match_num) { ip++; continue; }

        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;

        if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
            best_mlen = matches[match_num-1].len;
            best_off = matches[match_num-1].off;
            cur = 0;
            last_pos = 1;
            goto _storeSequence;
        }

        best_mlen = (last_pos) ? last_pos : minMatch;

        /* set prices using matches at position = 0 */
        for (u = 0; u < match_num; u++) {
            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
            best_mlen = matches[u].len;
            litlen = opt[0].litlen;
            while (mlen <= best_mlen) {
                price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                if (mlen > last_pos || price < opt[mlen].price)
                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                mlen++;
        }   }

        if (last_pos < minMatch) {
            ip++; continue;
        }

        /* check further positions */
        for (cur = 1; cur <= last_pos; cur++) {
            inr = ip + cur;

            if (opt[cur-1].mlen == 1) {
                litlen = opt[cur-1].litlen + 1;
                if (cur > litlen) {
                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
                } else
                    price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
            } else {
                litlen = 1;
                price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
            }

            if (cur > last_pos || price <= opt[cur].price)
                SET_PRICE(cur, 1, 0, litlen, price);

            if (cur == last_pos) break;

            if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
                continue;

            mlen = opt[cur].mlen;
            if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                opt[cur].rep[2] = opt[cur-mlen].rep[1];
                opt[cur].rep[1] = opt[cur-mlen].rep[0];
                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
            } else {
                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }

            best_mlen = minMatch;
            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                for (i = (mlen != 1); i<last_i; i++) {
                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                    const U32 repIndex = (U32)(current+cur - repCur);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
                    if ( (repCur > 0 && repCur <= (S32)(current+cur))
                      && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                      && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                        mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;

                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
                            goto _storeSequence;
                        }

                        best_off = i - (opt[cur].mlen != 1);
                        if (mlen > best_mlen) best_mlen = mlen;

                        do {
                            if (opt[cur].mlen == 1) {
                                litlen = opt[cur].litlen;
                                if (cur > litlen) {
                                    price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                } else
                                    price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                            } else {
                                litlen = 0;
                                price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                            }

                            if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                SET_PRICE(cur + mlen, mlen, i, litlen, price);
                            mlen--;
                        } while (mlen >= minMatch);
            }   }   }

            match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);

            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                best_mlen = matches[match_num-1].len;
                best_off = matches[match_num-1].off;
                last_pos = cur + 1;
                goto _storeSequence;
            }

            /* set prices using matches at position = cur */
            for (u = 0; u < match_num; u++) {
                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
                best_mlen = matches[u].len;

                while (mlen <= best_mlen) {
                    if (opt[cur].mlen == 1) {
                        litlen = opt[cur].litlen;
                        if (cur > litlen)
                            price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                        else
                            price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                    } else {
                        litlen = 0;
                        price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                    }

                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);

                    mlen++;
        }   }   }   /* for (cur = 1; cur <= last_pos; cur++) */

        best_mlen = opt[last_pos].mlen;
        best_off = opt[last_pos].off;
        cur = last_pos - best_mlen;

        /* store sequence */
 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
        opt[0].mlen = 1;

        while (1) {
            mlen = opt[cur].mlen;
            offset = opt[cur].off;
            opt[cur].mlen = best_mlen;
            opt[cur].off = best_off;
            best_mlen = mlen;
            best_off = offset;
            if (mlen > cur) break;
            cur -= mlen;
        }

        for (u = 0; u <= last_pos; ) {
            u += opt[u].mlen;
        }

        for (cur=0; cur < last_pos; ) {
            mlen = opt[cur].mlen;
            if (mlen == 1) { ip++; cur++; continue; }
            offset = opt[cur].off;
            cur += mlen;
            litLength = (U32)(ip - anchor);

            if (offset > ZSTD_REP_MOVE_OPT) {
                rep[2] = rep[1];
                rep[1] = rep[0];
                rep[0] = offset - ZSTD_REP_MOVE_OPT;
                offset--;
            } else {
                if (offset != 0) {
                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                    if (offset != 1) rep[2] = rep[1];
                    rep[1] = rep[0];
                    rep[0] = best_off;
                }

                if (litLength==0) offset--;
            }

            ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            anchor = ip = ip + mlen;
    }    }   /* for (cur=0; cur < last_pos; ) */

    /* Save reps for next block */
    { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }

    /* Return the last literals size */
    return iend - anchor;
 }


 size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
 }

 size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
    return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
 }
--- a/contrib/zstd/zstd_opt.h
+++ b/contrib/zstd/zstd_opt.h
@@ -1,921 +1,30 @@
 /**
 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTD_OPT_H
 #define ZSTD_OPT_H

 /* Note : this file is intended to be included within zstd_compress.c */
 #include "zstd_compress.h"

 #if defined (__cplusplus)
 extern "C" {
 #endif

 #ifndef ZSTD_OPT_H_91842398743
 #define ZSTD_OPT_H_91842398743
 size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

 size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

 #define ZSTD_LITFREQ_ADD    2
 #define ZSTD_FREQ_DIV       4
 #define ZSTD_MAX_PRICE      (1<<30)

 /*-*************************************
 *  Price functions for optimal parser
 ***************************************/
 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
 {
    ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
    ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
    ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
    ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
    ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
 }


 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
 {
    unsigned u;

    ssPtr->cachedLiterals = NULL;
    ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
    ssPtr->staticPrices = 0;

    if (ssPtr->litLengthSum == 0) {
        if (srcSize <= 1024) ssPtr->staticPrices = 1;

        for (u=0; u<=MaxLit; u++)
            ssPtr->litFreq[u] = 0;
        for (u=0; u<srcSize; u++)
            ssPtr->litFreq[src[u]]++;

        ssPtr->litSum = 0;
        ssPtr->litLengthSum = MaxLL+1;
        ssPtr->matchLengthSum = MaxML+1;
        ssPtr->offCodeSum = (MaxOff+1);
        ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);

        for (u=0; u<=MaxLit; u++) {
            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
            ssPtr->litSum += ssPtr->litFreq[u];
        }
        for (u=0; u<=MaxLL; u++)
            ssPtr->litLengthFreq[u] = 1;
        for (u=0; u<=MaxML; u++)
            ssPtr->matchLengthFreq[u] = 1;
        for (u=0; u<=MaxOff; u++)
            ssPtr->offCodeFreq[u] = 1;
    } else {
        ssPtr->matchLengthSum = 0;
        ssPtr->litLengthSum = 0;
        ssPtr->offCodeSum = 0;
        ssPtr->matchSum = 0;
        ssPtr->litSum = 0;

        for (u=0; u<=MaxLit; u++) {
            ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
            ssPtr->litSum += ssPtr->litFreq[u];
        }
        for (u=0; u<=MaxLL; u++) {
            ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
            ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
        }
        for (u=0; u<=MaxML; u++) {
            ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
            ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
            ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
        }
        ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
        for (u=0; u<=MaxOff; u++) {
            ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
            ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
        }
    }

    ZSTD_setLog2Prices(ssPtr);
 }


 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
 {
    U32 price, u;

    if (ssPtr->staticPrices)
        return ZSTD_highbit32((U32)litLength+1) + (litLength*6);

    if (litLength == 0)
        return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);

    /* literals */
    if (ssPtr->cachedLiterals == literals) {
        U32 const additional = litLength - ssPtr->cachedLitLength;
        const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
        price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
        for (u=0; u < additional; u++)
            price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
        ssPtr->cachedPrice = price;
        ssPtr->cachedLitLength = litLength;
    } else {
        price = litLength * ssPtr->log2litSum;
        for (u=0; u < litLength; u++)
            price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);

        if (litLength >= 12) {
            ssPtr->cachedLiterals = literals;
            ssPtr->cachedPrice = price;
            ssPtr->cachedLitLength = litLength;
        }
    }

    /* literal Length */
    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
    }

    return price;
 }


 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
 {
    /* offset */
    U32 price;
    BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);

    if (seqStorePtr->staticPrices)
        return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;

    price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
    if (!ultra && offCode >= 20) price += (offCode-19)*2;

    /* match Length */
    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
    }

    return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
 }


 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
 {
    U32 u;

    /* literals */
    seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
    for (u=0; u < litLength; u++)
        seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;

    /* literal Length */
    {   const BYTE LL_deltaCode = 19;
        const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
        seqStorePtr->litLengthFreq[llCode]++;
        seqStorePtr->litLengthSum++;
    }

    /* match offset */
    {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
        seqStorePtr->offCodeSum++;
        seqStorePtr->offCodeFreq[offCode]++;
    }

    /* match Length */
    {   const BYTE ML_deltaCode = 36;
        const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
        seqStorePtr->matchLengthFreq[mlCode]++;
        seqStorePtr->matchLengthSum++;
    }

    ZSTD_setLog2Prices(seqStorePtr);
 }


 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
    {                                                 \
        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
        opt[pos].mlen = mlen_;                         \
        opt[pos].off = offset_;                        \
        opt[pos].litlen = litlen_;                     \
        opt[pos].price = price_;                       \
    }



 /* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
 FORCE_INLINE
 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
 {
    U32* const hashTable3  = zc->hashTable3;
    U32 const hashLog3  = zc->hashLog3;
    const BYTE* const base = zc->base;
    U32 idx = zc->nextToUpdate3;
    const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
    const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);

    while(idx < target) {
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
        idx++;
    }

    return hashTable3[hash3];
 }


 /*-*************************************
 *  Binary Tree search
 ***************************************/
 static U32 ZSTD_insertBtAndGetAllMatches (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        U32 nbCompares, const U32 mls,
                        U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    const BYTE* const base = zc->base;
    const U32 current = (U32)(ip-base);
    const U32 hashLog = zc->params.cParams.hashLog;
    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
    U32* const hashTable = zc->hashTable;
    U32 matchIndex  = hashTable[h];
    U32* const bt   = zc->chainTable;
    const U32 btLog = zc->params.cParams.chainLog - 1;
    const U32 btMask= (1U << btLog) - 1;
    size_t commonLengthSmaller=0, commonLengthLarger=0;
    const BYTE* const dictBase = zc->dictBase;
    const U32 dictLimit = zc->dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const U32 btLow = btMask >= current ? 0 : current - btMask;
    const U32 windowLow = zc->lowLimit;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = bt + 2*(current&btMask) + 1;
    U32 matchEndIdx = current+8;
    U32 dummy32;   /* to be nullified at the end */
    U32 mnum = 0;

    const U32 minMatch = (mls == 3) ? 3 : 4;
    size_t bestLength = minMatchLen-1;

    if (minMatch == 3) { /* HC3 match finder */
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
        if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
            const BYTE* match;
            size_t currentMl=0;
            if ((!extDict) || matchIndex3 >= dictLimit) {
                match = base + matchIndex3;
                if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
            } else {
                match = dictBase + matchIndex3;
                if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH))    /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
                    currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
            }

            /* save best solution */
            if (currentMl > bestLength) {
                bestLength = currentMl;
                matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
                matches[mnum].len = (U32)currentMl;
                mnum++;
                if (currentMl > ZSTD_OPT_NUM) goto update;
                if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
            }
        }
    }

    hashTable[h] = current;   /* Update Hash Table */

    while (nbCompares-- && (matchIndex > windowLow)) {
        U32* nextPtr = bt + 2*(matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
        const BYTE* match;

        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
            match = base + matchIndex;
            if (match[matchLength] == ip[matchLength]) {
                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
            }
        } else {
            match = dictBase + matchIndex;
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
            if (matchIndex+matchLength >= dictLimit)
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
        }

        if (matchLength > bestLength) {
            if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
            bestLength = matchLength;
            matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
            matches[mnum].len = (U32)matchLength;
            mnum++;
            if (matchLength > ZSTD_OPT_NUM) break;
            if (ip+matchLength == iLimit)   /* equal : no way to know if inf or sup */
                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
        }

        if (match[matchLength] < ip[matchLength]) {
            /* match is smaller than current */
            *smallerPtr = matchIndex;             /* update smaller idx */
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
        } else {
            /* match is larger than current */
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
    }   }

    *smallerPtr = *largerPtr = 0;

 update:
    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
    return mnum;
 }


 /** Tree updater, providing best match */
 static U32 ZSTD_BtGetAllMatches (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
 }


 static U32 ZSTD_BtGetAllMatches_selectMLS (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iHighLimit,
                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    switch(matchLengthSearch)
    {
    case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
    default :
    case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
    case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
    case 7 :
    case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
    }
 }

 /** Tree updater, providing best match */
 static U32 ZSTD_BtGetAllMatches_extDict (
                        ZSTD_CCtx* zc,
                        const BYTE* const ip, const BYTE* const iLimit,
                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
 }


 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
                        ZSTD_CCtx* zc,   /* Index table will be updated */
                        const BYTE* ip, const BYTE* const iHighLimit,
                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
 {
    switch(matchLengthSearch)
    {
    case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
    default :
    case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
    case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
    case 7 :
    case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
    }
 }


 /*-*******************************
 *  Optimal parser
 *********************************/
 FORCE_INLINE
 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                                    const void* src, size_t srcSize, const int ultra)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base;
    const BYTE* const prefixStart = base + ctx->dictLimit;

    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
    const U32 sufficient_len = ctx->params.cParams.targetLength;
    const U32 mls = ctx->params.cParams.searchLength;
    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;

    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
    ZSTD_match_t* matches = seqStorePtr->matchTable;
    const BYTE* inr;
    U32 offset, rep[ZSTD_REP_NUM];

    /* init */
    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
    ip += (ip==prefixStart);
    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }

    /* Match Loop */
    while (ip < ilimit) {
        U32 cur, match_num, last_pos, litlen, price;
        U32 u, mlen, best_mlen, best_off, litLength;
        memset(opt, 0, sizeof(ZSTD_optimal_t));
        last_pos = 0;
        litlen = (U32)(ip - anchor);

        /* check repCode */
        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
            for (i=(ip == anchor); i<last_i; i++) {
                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
                    && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
                    mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
                    if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                        best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                        goto _storeSequence;
                    }
                    best_off = i - (ip == anchor);
                    do {
                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                        if (mlen > last_pos || price < opt[mlen].price)
                            SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                        mlen--;
                    } while (mlen >= minMatch);
        }   }   }

        match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);

        if (!last_pos && !match_num) { ip++; continue; }

        if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
            best_mlen = matches[match_num-1].len;
            best_off = matches[match_num-1].off;
            cur = 0;
            last_pos = 1;
            goto _storeSequence;
        }

        /* set prices using matches at position = 0 */
        best_mlen = (last_pos) ? last_pos : minMatch;
        for (u = 0; u < match_num; u++) {
            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
            best_mlen = matches[u].len;
            while (mlen <= best_mlen) {
                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                if (mlen > last_pos || price < opt[mlen].price)
                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);   /* note : macro modifies last_pos */
                mlen++;
        }   }

        if (last_pos < minMatch) { ip++; continue; }

        /* initialize opt[0] */
        { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;
        opt[0].litlen = litlen;

         /* check further positions */
        for (cur = 1; cur <= last_pos; cur++) {
           inr = ip + cur;

           if (opt[cur-1].mlen == 1) {
                litlen = opt[cur-1].litlen + 1;
                if (cur > litlen) {
                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
                } else
                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
           } else {
                litlen = 1;
                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
           }

           if (cur > last_pos || price <= opt[cur].price)
                SET_PRICE(cur, 1, 0, litlen, price);

           if (cur == last_pos) break;

           if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
               continue;

           mlen = opt[cur].mlen;
           if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                opt[cur].rep[2] = opt[cur-mlen].rep[1];
                opt[cur].rep[1] = opt[cur-mlen].rep[0];
                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
           } else {
                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
           }

            best_mlen = minMatch;
            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                    if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
                       && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
                       mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;

                       if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
                            goto _storeSequence;
                       }

                       best_off = i - (opt[cur].mlen != 1);
                       if (mlen > best_mlen) best_mlen = mlen;

                       do {
                           if (opt[cur].mlen == 1) {
                                litlen = opt[cur].litlen;
                                if (cur > litlen) {
                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                } else
                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                            } else {
                                litlen = 0;
                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                            }

                            if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                SET_PRICE(cur + mlen, mlen, i, litlen, price);
                            mlen--;
                        } while (mlen >= minMatch);
            }   }   }

            match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);

            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                best_mlen = matches[match_num-1].len;
                best_off = matches[match_num-1].off;
                last_pos = cur + 1;
                goto _storeSequence;
            }

            /* set prices using matches at position = cur */
            for (u = 0; u < match_num; u++) {
                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
                best_mlen = matches[u].len;

                while (mlen <= best_mlen) {
                    if (opt[cur].mlen == 1) {
                        litlen = opt[cur].litlen;
                        if (cur > litlen)
                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                        else
                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                    } else {
                        litlen = 0;
                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                    }

                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);

                    mlen++;
        }   }   }

        best_mlen = opt[last_pos].mlen;
        best_off = opt[last_pos].off;
        cur = last_pos - best_mlen;

        /* store sequence */
 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
        opt[0].mlen = 1;

        while (1) {
            mlen = opt[cur].mlen;
            offset = opt[cur].off;
            opt[cur].mlen = best_mlen;
            opt[cur].off = best_off;
            best_mlen = mlen;
            best_off = offset;
            if (mlen > cur) break;
            cur -= mlen;
        }

        for (u = 0; u <= last_pos;) {
            u += opt[u].mlen;
        }

        for (cur=0; cur < last_pos; ) {
            mlen = opt[cur].mlen;
            if (mlen == 1) { ip++; cur++; continue; }
            offset = opt[cur].off;
            cur += mlen;
            litLength = (U32)(ip - anchor);

            if (offset > ZSTD_REP_MOVE_OPT) {
                rep[2] = rep[1];
                rep[1] = rep[0];
                rep[0] = offset - ZSTD_REP_MOVE_OPT;
                offset--;
            } else {
                if (offset != 0) {
                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                    if (offset != 1) rep[2] = rep[1];
                    rep[1] = rep[0];
                    rep[0] = best_off;
                }
                if (litLength==0) offset--;
            }

            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            anchor = ip = ip + mlen;
    }    }   /* for (cur=0; cur < last_pos; ) */

    /* Save reps for next block */
    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }

    /* Last Literals */
    {   size_t const lastLLSize = iend - anchor;
        memcpy(seqStorePtr->lit, anchor, lastLLSize);
        seqStorePtr->lit += lastLLSize;
    }
 }


 FORCE_INLINE
 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                                     const void* src, size_t srcSize, const int ultra)
 {
    seqStore_t* seqStorePtr = &(ctx->seqStore);
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ctx->base;
    const U32 lowestIndex = ctx->lowLimit;
    const U32 dictLimit = ctx->dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictBase = ctx->dictBase;
    const BYTE* const dictEnd  = dictBase + dictLimit;

    const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
    const U32 sufficient_len = ctx->params.cParams.targetLength;
    const U32 mls = ctx->params.cParams.searchLength;
    const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;

    ZSTD_optimal_t* opt = seqStorePtr->priceTable;
    ZSTD_match_t* matches = seqStorePtr->matchTable;
    const BYTE* inr;

    /* init */
    U32 offset, rep[ZSTD_REP_NUM];
    { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }

    ctx->nextToUpdate3 = ctx->nextToUpdate;
    ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
    ip += (ip==prefixStart);

    /* Match Loop */
    while (ip < ilimit) {
        U32 cur, match_num, last_pos, litlen, price;
        U32 u, mlen, best_mlen, best_off, litLength;
        U32 current = (U32)(ip-base);
        memset(opt, 0, sizeof(ZSTD_optimal_t));
        last_pos = 0;
        opt[0].litlen = (U32)(ip - anchor);

        /* check repCode */
        {   U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
            for (i = (ip==anchor); i<last_i; i++) {
                const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
                const U32 repIndex = (U32)(current - repCur);
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
                if ( (repCur > 0 && repCur <= (S32)current)
                   && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                   && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                    /* repcode detected we should take it */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                    mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;

                    if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
                        best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
                        goto _storeSequence;
                    }

                    best_off = i - (ip==anchor);
                    litlen = opt[0].litlen;
                    do {
                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                        if (mlen > last_pos || price < opt[mlen].price)
                            SET_PRICE(mlen, mlen, i, litlen, price);   /* note : macro modifies last_pos */
                        mlen--;
                    } while (mlen >= minMatch);
        }   }   }

        match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch);  /* first search (depth 0) */

        if (!last_pos && !match_num) { ip++; continue; }

        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
        opt[0].mlen = 1;

        if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
            best_mlen = matches[match_num-1].len;
            best_off = matches[match_num-1].off;
            cur = 0;
            last_pos = 1;
            goto _storeSequence;
        }

        best_mlen = (last_pos) ? last_pos : minMatch;

        /* set prices using matches at position = 0 */
        for (u = 0; u < match_num; u++) {
            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
            best_mlen = matches[u].len;
            litlen = opt[0].litlen;
            while (mlen <= best_mlen) {
                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                if (mlen > last_pos || price < opt[mlen].price)
                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
                mlen++;
        }   }

        if (last_pos < minMatch) {
            ip++; continue;
        }

        /* check further positions */
        for (cur = 1; cur <= last_pos; cur++) {
            inr = ip + cur;

            if (opt[cur-1].mlen == 1) {
                litlen = opt[cur-1].litlen + 1;
                if (cur > litlen) {
                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
                } else
                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
            } else {
                litlen = 1;
                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
            }

            if (cur > last_pos || price <= opt[cur].price)
                SET_PRICE(cur, 1, 0, litlen, price);

            if (cur == last_pos) break;

            if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
                continue;

            mlen = opt[cur].mlen;
            if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
                opt[cur].rep[2] = opt[cur-mlen].rep[1];
                opt[cur].rep[1] = opt[cur-mlen].rep[0];
                opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
            } else {
                opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
                opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
                opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }

            best_mlen = minMatch;
            {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                for (i = (mlen != 1); i<last_i; i++) {
                    const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
                    const U32 repIndex = (U32)(current+cur - repCur);
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
                    if ( (repCur > 0 && repCur <= (S32)(current+cur))
                      && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex))  /* intentional overflow */
                      && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                        mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;

                        if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
                            best_mlen = mlen; best_off = i; last_pos = cur + 1;
                            goto _storeSequence;
                        }

                        best_off = i - (opt[cur].mlen != 1);
                        if (mlen > best_mlen) best_mlen = mlen;

                        do {
                            if (opt[cur].mlen == 1) {
                                litlen = opt[cur].litlen;
                                if (cur > litlen) {
                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
                                } else
                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
                            } else {
                                litlen = 0;
                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
                            }

                            if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                SET_PRICE(cur + mlen, mlen, i, litlen, price);
                            mlen--;
                        } while (mlen >= minMatch);
            }   }   }

            match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);

            if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
                best_mlen = matches[match_num-1].len;
                best_off = matches[match_num-1].off;
                last_pos = cur + 1;
                goto _storeSequence;
            }

            /* set prices using matches at position = cur */
            for (u = 0; u < match_num; u++) {
                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
                best_mlen = matches[u].len;

                while (mlen <= best_mlen) {
                    if (opt[cur].mlen == 1) {
                        litlen = opt[cur].litlen;
                        if (cur > litlen)
                            price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
                        else
                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
                    } else {
                        litlen = 0;
                        price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
                    }

                    if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
                        SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);

                    mlen++;
        }   }   }   /* for (cur = 1; cur <= last_pos; cur++) */

        best_mlen = opt[last_pos].mlen;
        best_off = opt[last_pos].off;
        cur = last_pos - best_mlen;

        /* store sequence */
 _storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
        opt[0].mlen = 1;

        while (1) {
            mlen = opt[cur].mlen;
            offset = opt[cur].off;
            opt[cur].mlen = best_mlen;
            opt[cur].off = best_off;
            best_mlen = mlen;
            best_off = offset;
            if (mlen > cur) break;
            cur -= mlen;
        }

        for (u = 0; u <= last_pos; ) {
            u += opt[u].mlen;
        }

        for (cur=0; cur < last_pos; ) {
            mlen = opt[cur].mlen;
            if (mlen == 1) { ip++; cur++; continue; }
            offset = opt[cur].off;
            cur += mlen;
            litLength = (U32)(ip - anchor);

            if (offset > ZSTD_REP_MOVE_OPT) {
                rep[2] = rep[1];
                rep[1] = rep[0];
                rep[0] = offset - ZSTD_REP_MOVE_OPT;
                offset--;
            } else {
                if (offset != 0) {
                    best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
                    if (offset != 1) rep[2] = rep[1];
                    rep[1] = rep[0];
                    rep[0] = best_off;
                }

                if (litLength==0) offset--;
            }

            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
            anchor = ip = ip + mlen;
    }    }   /* for (cur=0; cur < last_pos; ) */

    /* Save reps for next block */
    { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }

    /* Last Literals */
    {   size_t lastLLSize = iend - anchor;
        memcpy(seqStorePtr->lit, anchor, lastLLSize);
        seqStorePtr->lit += lastLLSize;
    }
 #if defined (__cplusplus)
 }
 #endif

 #endif  /* ZSTD_OPT_H_91842398743 */
 #endif /* ZSTD_OPT_H */
--- a/contrib/zstd/zstdmt_compress.c
+++ b/contrib/zstd/zstdmt_compress.c
--- a/contrib/zstd/zstdmt_compress.h
+++ b/contrib/zstd/zstdmt_compress.h
@@ -0,0 +1,134 @@
 /*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

 #ifndef ZSTDMT_COMPRESS_H
 #define ZSTDMT_COMPRESS_H

 #if defined (__cplusplus)
 extern "C" {
 #endif


 /* Note : This is an internal API.
 *        Some methods are still exposed (ZSTDLIB_API),
 *        because it used to be the only way to invoke MT compression.
 *        Now, it's recommended to use ZSTD_compress_generic() instead.
 *        These methods will stop being exposed in a future version */

 /* ===   Dependencies   === */
 #include <stddef.h>                /* size_t */
 #ifndef ZSTD_STATIC_LINKING_ONLY
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
 #endif
 #include "zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */


 /* ===   Memory management   === */
 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
 ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
                                                    ZSTD_customMem cMem);
 ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);

 ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);


 /* ===   Simple buffer-to-butter one-pass function   === */

 ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                       int compressionLevel);



 /* ===   Streaming functions   === */

 ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);    /**< pledgedSrcSize is optional and can be zero == unknown */

 ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);

 ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
 ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */


 /* ===   Advanced functions and parameters  === */

 #ifndef ZSTDMT_SECTION_SIZE_MIN
 #  define ZSTDMT_SECTION_SIZE_MIN (1U << 20)   /* 1 MB - Minimum size of each compression job */
 #endif

 ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
                                           void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
                                     const ZSTD_CDict* cdict,
                                           ZSTD_parameters const params,
                                           unsigned overlapLog);

 ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
                                        ZSTD_parameters params,
                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */

 ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
                                        const ZSTD_CDict* cdict,
                                        ZSTD_frameParameters fparams,
                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */

 /* ZSTDMT_parameter :
 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
 typedef enum {
    ZSTDMT_p_sectionSize,        /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
    ZSTDMT_p_overlapSectionLog   /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
 } ZSTDMT_parameter;

 /* ZSTDMT_setMTCtxParameter() :
 * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
 * The function must be called typically after ZSTD_createCCtx().
 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);


 /*! ZSTDMT_compressStream_generic() :
 *  Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
 *  depending on flush directive.
 * @return : minimum amount of data still to be flushed
 *           0 if fully flushed
 *           or an error code */
 ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                                                ZSTD_outBuffer* output,
                                                ZSTD_inBuffer* input,
                                                ZSTD_EndDirective endOp);


 /* ===   Private definitions; never ever use directly  === */

 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);

 size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads);

 /*! ZSTDMT_initCStream_internal() :
 *  Private use only. Init streaming operation.
 *  expects params to be valid.
 *  must receive dict, or cdict, or none, but not both.
 *  @return : 0, or an error code */
 size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
                    const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
                    const ZSTD_CDict* cdict,
                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);


 #if defined (__cplusplus)
 }
 #endif

 #endif   /* ZSTDMT_COMPRESS_H */