diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/zstd06/compress | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/zstd06/compress')
-rw-r--r-- | contrib/libs/zstd06/compress/fse_compress.c | 803 | ||||
-rw-r--r-- | contrib/libs/zstd06/compress/huf_compress.c | 560 | ||||
-rw-r--r-- | contrib/libs/zstd06/compress/zbuff_compress.c | 291 | ||||
-rw-r--r-- | contrib/libs/zstd06/compress/zstd_compress.c | 2565 | ||||
-rw-r--r-- | contrib/libs/zstd06/compress/zstd_opt.h | 1033 |
5 files changed, 5252 insertions, 0 deletions
diff --git a/contrib/libs/zstd06/compress/fse_compress.c b/contrib/libs/zstd06/compress/fse_compress.c new file mode 100644 index 0000000000..dad0be7942 --- /dev/null +++ b/contrib/libs/zstd06/compress/fse_compress.c @@ -0,0 +1,803 @@ +/* ****************************************************************** + FSE : Finite State Entropy encoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include <intrin.h> /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include <stdlib.h> /* malloc, free, qsort */ +#include <string.h> /* memcpy, memset */ +#include <stdio.h> /* printf (debug) */ +#include "bitstream.h" +#include "fse_static.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************************************** +* Complex types +****************************************************************/ +typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; + void* const ptr = ct; + U16* const tableU16 = ( (U16*) ptr) + 2; + void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; + + FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ + U32 highThreshold = tableSize-1; + + /* CTable header */ + + + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u<=maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { + tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; + position = (position + step) & tableMask; + while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u<tableSize; u++) { + FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ + tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ + }} + + /* Build Symbol Transformation Table */ + { unsigned total = 0; + unsigned s; + for (s=0; s<=maxSymbolValue; s++) { + switch (normalizedCounter[s]) + { + case 0: break; + + case -1: + case 1: + symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); + symbolTT[s].deltaFindState = total - 1; + total ++; + break; + default : + { + U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); + U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; + symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; + symbolTT[s].deltaFindState = total - normalizedCounter[s]; + total += normalizedCounter[s]; + } } } } + + return 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) +{ + size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static short FSE_abs(short a) { return a<0 ? -a : a; } + +static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while (remaining>1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) charnum++; + while (charnum >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (charnum >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { short count = normalizedCounter[charnum++]; + const short max = (short)((2*threshold-1)-remaining); + remaining -= FSE_abs(count); + if (remaining<1) return ERROR(GENERIC); + count++; /* +1 for extra accuracy */ + if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count<max); + previous0 = (count==1); + while (remaining<threshold) nbBits--, threshold>>=1; + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + + + +/*-************************************************************** +* Counting histogram +****************************************************************/ +/*! FSE_count_simple + This function just counts byte values within `src`, + and store the histogram into table `count`. + This function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element +*/ +static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + + memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip<end) count[*ip++]++; + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + + { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } + + return (size_t)max; +} + + +static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + + U32 Counting1[256] = { 0 }; + U32 Counting2[256] = { 0 }; + U32 Counting3[256] = { 0 }; + U32 Counting4[256] = { 0 }; + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip<iend) Counting1[*ip++]++; + + if (checkMax) { /* verify stats will fit into destination table */ + U32 s; for (s=255; s>maxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + }} + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (*maxSymbolValuePtr <255) + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); + *maxSymbolValuePtr = 255; + return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); +} + + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ + +size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return size; +} + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + return minBits; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - 2; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + norm[s]=-2; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == -2) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) maxV=s, maxC=count[s]; + norm[maxV] += (short)ToDistribute; + return 0; + } + + { + U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ + U64 tmpTotal = mid; + for (s=0; s<=maxSymbolValue; s++) { + if (norm[s]==-2) { + U64 end = tmpTotal + (count[s] * rStep); + U32 sStart = (U32)(tmpTotal >> vStepLog); + U32 sEnd = (U32)(end >> vStepLog); + U32 weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<<tableLog; + unsigned s; + unsigned largest=0; + short largestP=0; + U32 lowThreshold = (U32)(total >> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; + } + if (proba > largestP) largestP=proba, largest=s; + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + printf("%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<<tableLog)) + printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); + getchar(); + } +#endif + + return tableLog; +} + + +/* fake FSE_CTable, for raw (uncompressed) input */ +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) +{ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + void* const ptr = ct; + U16* const tableU16 = ( (U16*) ptr) + 2; + void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s<tableSize; s++) + tableU16[s] = (U16)(tableSize + s); + + /* Build Symbol Transformation Table */ + { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); + + for (s=0; s<=maxSymbolValue; s++) { + symbolTT[s].deltaNbBits = deltaNbBits; + symbolTT[s].deltaFindState = s-1; + } } + + + return 0; +} + +/* fake FSE_CTable, for rle (100% always same symbol) input */ +size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) +{ + void* ptr = ct; + U16* tableU16 = ( (U16*) ptr) + 2; + void* FSCTptr = (U32*)ptr + 2; + FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; + + /* header */ + tableU16[-2] = (U16) 0; + tableU16[-1] = (U16) symbolValue; + + /* Build table */ + tableU16[0] = 0; + tableU16[1] = 0; /* just in case */ + + /* Build Symbol Transformation Table */ + symbolTT[symbolValue].deltaNbBits = 0; + symbolTT[symbolValue].deltaFindState = 0; + + return 0; +} + + +static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct, const unsigned fast) +{ + const BYTE* const istart = (const BYTE*) src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip=iend; + + + BIT_CStream_t bitC; + FSE_CState_t CState1, CState2; + + /* init */ + if (srcSize <= 2) return 0; + { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(errorCode)) return 0; } + +#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + + if (srcSize & 1) { + FSE_initCState2(&CState1, ct, *--ip); + FSE_initCState2(&CState2, ct, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } else { + FSE_initCState2(&CState2, ct, *--ip); + FSE_initCState2(&CState1, ct, *--ip); + } + + /* join to mod 4 */ + srcSize -= 2; + if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + for ( ; ip>istart ; ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + const BYTE* const istart = (const BYTE*) src; + const BYTE* ip = istart; + + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 count[FSE_MAX_SYMBOL_VALUE+1]; + S16 norm[FSE_MAX_SYMBOL_VALUE+1]; + CTable_max_t ct; + size_t errorCode; + + /* init conditions */ + if (srcSize <= 1) return 0; /* Uncompressible */ + if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode == srcSize) return 1; + if (errorCode == 1) return 0; /* each symbol only present once */ + if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + + /* Write table description header */ + errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + op += errorCode; + + /* Compress */ + errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct); + if (errorCode == 0) return 0; /* not enough space for compressed data */ + op += errorCode; + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) + return 0; + + return op-ostart; +} + +size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/contrib/libs/zstd06/compress/huf_compress.c b/contrib/libs/zstd06/compress/huf_compress.c new file mode 100644 index 0000000000..d126305c6f --- /dev/null +++ b/contrib/libs/zstd06/compress/huf_compress.c @@ -0,0 +1,560 @@ +/* ****************************************************************** + Huffman encoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include <stdlib.h> /* malloc, free, qsort */ +#include <string.h> /* memcpy, memset */ +#include <stdio.h> /* printf (debug) */ +#include "huf_static.h" +#include "bitstream.h" +#include "fse.h" /* header compression */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within huf_static.h */ + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + +/*! HUF_writeCTable() : + `CTable` : huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_MAX_TABLELOG + 1]; + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 n; + BYTE* op = (BYTE*)dst; + size_t size; + + /* check conditions */ + if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1) + return ERROR(GENERIC); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n<=huffLog; n++) + bitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; n<maxSymbolValue; n++) + huffWeight[n] = bitsToWeight[CTable[n].nbBits]; + + size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); /* don't need last symbol stat : implied */ + if (HUF_isError(size)) return size; + if (size >= 128) return ERROR(GENERIC); /* should never happen, since maxSymbolValue <= 255 */ + if ((size <= 1) || (size >= maxSymbolValue/2)) { + if (size==1) { /* RLE */ + /* only possible case : serie of 1 (because there are at least 2) */ + /* can only be 2^n or (2^n-1), otherwise not an huffman tree */ + BYTE code; + switch(maxSymbolValue) + { + case 1: code = 0; break; + case 2: code = 1; break; + case 3: code = 2; break; + case 4: code = 3; break; + case 7: code = 4; break; + case 8: code = 5; break; + case 15: code = 6; break; + case 16: code = 7; break; + case 31: code = 8; break; + case 32: code = 9; break; + case 63: code = 10; break; + case 64: code = 11; break; + case 127: code = 12; break; + case 128: code = 13; break; + default : return ERROR(corruption_detected); + } + op[0] = (BYTE)(255-13 + code); + return 1; + } + /* Not compressible */ + if (maxSymbolValue > (241-128)) return ERROR(GENERIC); /* not implemented (not possible with current format) */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ + for (n=0; n<maxSymbolValue; n+=2) + op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; + } + + /* normal header case */ + op[0] = (BYTE)size; + return size+1; +} + + + +size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + size_t readSize; + U32 nbSymbols = 0; + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + /* get symbol weights */ + readSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE+1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(readSize)) return readSize; + + /* check result */ + if (tableLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill nbBits */ + { U32 n; for (n=0; n<nbSymbols; n++) { + const U32 w = huffWeight[n]; + CTable[n].nbBits = (BYTE)(tableLog + 1 - w); + }} + + /* fill val */ + { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; + U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; + { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } + /* determine stating value per rank */ + { U16 min = 0; + U32 n; for (n=HUF_MAX_TABLELOG; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } + } + + return readSize; +} + + +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_MAX_TABLELOG+1]; + int pos; + + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } + + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + while ((nBitsToDecrease<=HUF_MAX_TABLELOG) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ + + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1) +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1]; + nodeElt* huffNode = huffNode0 + 1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG; + if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC); + memset(huffNode0, 0, sizeof(huffNode0)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue); + + /* init for parents */ + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); + + /* create parents */ + while (nodeNb <= nodeRoot) { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; + U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; + if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ + } + + return maxNbBits; +} + +static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream) + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(errorCode)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + case 0 : + default: ; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + size_t segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t errorCode; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart+2, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart+4, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + + op += errorCode; + return op-ostart; +} + + +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + U32 count[HUF_MAX_SYMBOL_VALUE+1]; + HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; + size_t errorCode; + + /* checks & inits */ + if (srcSize < 1) return 0; /* Uncompressed - note : 1 means rle, so first byte must be correct */ + if (dstSize < 1) return 0; /* not compressible within dst budget */ + if (srcSize > 128 * 1024) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; + if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } + if (errorCode <= (srcSize >> 7)+1) return 0; /* Heuristic : not compressible enough */ + + /* Build Huffman Tree */ + errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + huffLog = (U32)errorCode; + + /* Write table description header */ + errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode + 12 >= srcSize) return 0; /* not useful to try compression */ + op += errorCode; + + /* Compress */ + if (singleStream) + errorCode = HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ + else + errorCode = HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + op += errorCode; + + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) + return 0; + + return op-ostart; +} + + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0); +} + + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); +} diff --git a/contrib/libs/zstd06/compress/zbuff_compress.c b/contrib/libs/zstd06/compress/zbuff_compress.c new file mode 100644 index 0000000000..260aca0870 --- /dev/null +++ b/contrib/libs/zstd06/compress/zbuff_compress.c @@ -0,0 +1,291 @@ +/* + Buffered version of Zstd compression library + Copyright (C) 2015-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd homepage : http://www.zstd.net/ +*/ + + +/* ************************************* +* Dependencies +***************************************/ +#include <stdlib.h> +#include "error_private.h" +#include "zstd_internal.h" /* MIN, ZSTD_blockHeaderSize */ +#include "zstd_static.h" /* ZSTD_BLOCKSIZE_MAX */ +#include "zbuff_static.h" + + +/* ************************************* +* Constants +***************************************/ +static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE; + + +/*_************************************************** +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *dstCapacityPtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value. +* output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed. +* **************************************************/ + +typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush } ZBUFF_cStage; + +/* *** Ressources *** */ +struct ZBUFF_CCtx_s { + ZSTD_CCtx* zc; + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + size_t blockSize; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZBUFF_cStage stage; +}; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + ZBUFF_CCtx* zbc = (ZBUFF_CCtx*)malloc(sizeof(ZBUFF_CCtx)); + if (zbc==NULL) return NULL; + memset(zbc, 0, sizeof(*zbc)); + zbc->zc = ZSTD_createCCtx(); + return zbc; +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + if (zbc==NULL) return 0; /* support free on NULL */ + ZSTD_freeCCtx(zbc->zc); + free(zbc->inBuff); + free(zbc->outBuff); + free(zbc); + return 0; +} + + +/* *** Initialization *** */ + +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + /* allocate buffers */ + { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; + if (zbc->inBuffSize < neededInBuffSize) { + zbc->inBuffSize = neededInBuffSize; + free(zbc->inBuff); /* should not be necessary */ + zbc->inBuff = (char*)malloc(neededInBuffSize); + if (zbc->inBuff == NULL) return ERROR(memory_allocation); + } + zbc->blockSize = MIN(ZSTD_BLOCKSIZE_MAX, neededInBuffSize/2); + } + if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) { + zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1; + free(zbc->outBuff); /* should not be necessary */ + zbc->outBuff = (char*)malloc(zbc->outBuffSize); + if (zbc->outBuff == NULL) return ERROR(memory_allocation); + } + + { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + zbc->inToCompress = 0; + zbc->inBuffPos = 0; + zbc->inBuffTarget = zbc->blockSize; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_load; + return 0; /* ready to go */ +} + + +size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params; + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 0; + ZSTD_adjustCParams(¶ms.cParams, 0, dictSize); + return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel); +} + + +/* *** Compression *** */ + +static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr, + int flush) /* aggregate : wait for full block before compressing */ +{ + U32 notDone = 1; + const char* const istart = (const char*)src; + const char* const iend = istart + *srcSizePtr; + const char* ip = istart; + char* const ostart = (char*)dst; + char* const oend = ostart + *dstCapacityPtr; + char* op = ostart; + + while (notDone) { + switch(zbc->stage) + { + case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + + case ZBUFFcs_load: + /* complete inBuffer */ + { size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos; + size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip); + zbc->inBuffPos += loaded; + ip += loaded; + if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) { + notDone = 0; break; /* not enough input to get a full block : stop there, wait for more */ + } } + /* compress current block (note : this stage cannot be stopped in the middle) */ + { void* cDst; + size_t cSize; + size_t const iSize = zbc->inBuffPos - zbc->inToCompress; + size_t oSize = oend-op; + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress directly into output buffer (avoid flush stage) */ + else + cDst = zbc->outBuff, oSize = zbc->outBuffSize; + cSize = ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + /* prepare next block */ + zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize; + if (zbc->inBuffTarget > zbc->inBuffSize) + zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize; /* note : inBuffSize >= blockSize */ + zbc->inToCompress = zbc->inBuffPos; + if (cDst == op) { op += cSize; break; } /* no need to flush */ + zbc->outBuffContentSize = cSize; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_flush; /* continue to flush stage */ + } + + case ZBUFFcs_flush: + /* flush into dst */ + { size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize; + size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush); + op += flushed; + zbc->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { notDone = 0; break; } /* not enough space within dst to store compressed block : stop there */ + zbc->outBuffContentSize = 0; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_load; + break; + } + default: + return ERROR(GENERIC); /* impossible */ + } + } + + *srcSizePtr = ip - istart; + *dstCapacityPtr = op - ostart; + { size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos; + if (hintInSize==0) hintInSize = zbc->blockSize; + return hintInSize; + } +} + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, 0); +} + + + +/* *** Finalize *** */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t srcSize = 0; + ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, 1); /* use a valid src address instead of NULL */ + return zbc->outBuffContentSize - zbc->outBuffFlushedSize; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + *dstCapacityPtr; + BYTE* op = ostart; + size_t outSize = *dstCapacityPtr; + size_t epilogueSize, remaining; + ZBUFF_compressFlush(zbc, dst, &outSize); /* flush any remaining inBuff */ + op += outSize; + epilogueSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff + zbc->outBuffContentSize, zbc->outBuffSize - zbc->outBuffContentSize); /* epilogue into outBuff */ + zbc->outBuffContentSize += epilogueSize; + outSize = oend-op; + zbc->stage = ZBUFFcs_flush; + remaining = ZBUFF_compressFlush(zbc, op, &outSize); /* attempt to flush epilogue into dst */ + op += outSize; + if (!remaining) zbc->stage = ZBUFFcs_init; /* close only if nothing left to flush */ + *dstCapacityPtr = op-ostart; /* tells how many bytes were written */ + return remaining; +} + + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedCInSize(void) { return ZSTD_BLOCKSIZE_MAX; } +size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; } diff --git a/contrib/libs/zstd06/compress/zstd_compress.c b/contrib/libs/zstd06/compress/zstd_compress.c new file mode 100644 index 0000000000..1bb75c68cc --- /dev/null +++ b/contrib/libs/zstd06/compress/zstd_compress.c @@ -0,0 +1,2565 @@ +/* + ZSTD HC - High Compression Mode of Zstandard + Copyright (C) 2015-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd source repository : https://www.zstd.net +*/ + + +/* ******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include <intrin.h> /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include <stdlib.h> /* malloc */ +#include <string.h> /* memset */ +#include "mem.h" +#include "fse_static.h" +#include "huf_static.h" +#include "zstd_internal.h" + + +/*-************************************* +* Constants +***************************************/ +static const U32 g_searchStrength = 8; /* control skip over incompressible data */ + + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } + + +/*-************************************* +* Sequence storage +***************************************/ +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->offset = ssPtr->offsetStart; + ssPtr->lit = ssPtr->litStart; + ssPtr->litLength = ssPtr->litLengthStart; + ssPtr->matchLength = ssPtr->matchLengthStart; + ssPtr->longLengthID = 0; +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CCtx_s +{ + const BYTE* nextSrc; /* next block here to continue on current prefix */ + const BYTE* base; /* All regular indexes relative to this position */ + const BYTE* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 loadedDictEnd; + U32 stage; /* 0: created; 1: init,dictLoad; 2:started */ + ZSTD_parameters params; + void* workSpace; + size_t workSpaceSize; + size_t blockSize; + + seqStore_t seqStore; /* sequences storage ptrs */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + HUF_CElt* hufTable; + U32 flagStaticTables; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable [FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; +}; + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx)); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + free(cctx->workSpace); + free(cctx); + return 0; /* reserved as a potential error code in the future */ +} + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +{ + return &(ctx->seqStore); +} + + +#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; } +#define CLAMPCHECK(val,min,max) { if ((val<min) || (val>max)) return ERROR(compressionParameter_unsupported); } + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; + U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } + CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); + return 0; +} + + +static unsigned ZSTD_highbit(U32 val); + +/** ZSTD_checkCParams_advanced() : + temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ +size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) +{ + if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); + if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); + if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ + if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN; /* fake value - temporary work around */ + if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN; /* fake value - temporary work around */ + return ZSTD_checkCParams(cParams); +} + + +/** ZSTD_adjustParams() : + optimize params for q given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +void ZSTD_adjustCParams(ZSTD_compressionParameters* params, U64 srcSize, size_t dictSize) +{ + if (srcSize+dictSize == 0) return; /* no size information available : no adjustment */ + + /* resize params, to use less memory when necessary */ + { U32 const minSrcSize = (srcSize==0) ? 500 : 0; + U64 const rSize = srcSize + dictSize + minSrcSize; + if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) { + U32 const srcLog = ZSTD_highbit((U32)(rSize)-1) + 1; + if (params->windowLog > srcLog) params->windowLog = srcLog; + } } + if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; + { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + U32 const maxChainLog = params->windowLog+btPlus; + if (params->chainLog > maxChainLog) params->chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ + + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if ((params->hashLog < ZSTD_HASHLOG_MIN) && ((U32)params->strategy >= (U32)ZSTD_btlazy2)) params->hashLog = ZSTD_HASHLOG_MIN; /* required to ensure collision resistance in bt */ +} + + +size_t ZSTD_sizeofCCtx(ZSTD_compressionParameters cParams) /* hidden interface, for paramagrill */ +{ + ZSTD_CCtx* zc = ZSTD_createCCtx(); + ZSTD_parameters params; + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + ZSTD_compressBegin_advanced(zc, NULL, 0, params, 0); + { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; + ZSTD_freeCCtx(zc); + return ccsize; } +} + +/*! ZSTD_resetCCtx_advanced() : + note : 'params' is expected to be validated */ +static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, + ZSTD_parameters params, U32 reset) +{ /* note : params considered validated here */ + const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); + const U32 divider = (params.cParams.searchLength==3) ? 3 : 4; + const size_t maxNbSeq = blockSize / divider; + const size_t tokenSpace = blockSize + 11*maxNbSeq; + const size_t chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); + const size_t hSize = ((size_t)1) << params.cParams.hashLog; + const size_t h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32) + + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); + size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace + + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0); + if (zc->workSpaceSize < neededSpace) { + free(zc->workSpace); + zc->workSpace = malloc(neededSpace); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + } } + + if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ + zc->hashTable3 = (U32*)(zc->workSpace); + zc->hashTable = zc->hashTable3 + h3Size; + zc->chainTable = zc->hashTable + hSize; + zc->seqStore.buffer = zc->chainTable + chainSize; + zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; + zc->flagStaticTables = 0; + zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; + + zc->nextToUpdate = 1; + zc->nextSrc = NULL; + zc->base = NULL; + zc->dictBase = NULL; + zc->dictLimit = 0; + zc->lowLimit = 0; + zc->params = params; + zc->blockSize = blockSize; + + if (params.cParams.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits); + zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1))); + zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); + zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->seqStore.litLengthSum = 0; + } + zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); + zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); + zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; + + zc->stage = 1; + zc->loadedDictEnd = 0; + + return 0; +} + + +/*! ZSTD_copyCCtx() : +* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. +* Only works during stage 1 (i.e. after creation, but before first call to ZSTD_compressContinue()). +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) +{ + if (srcCCtx->stage!=1) return ERROR(stage_wrong); + + dstCCtx->hashLog3 = srcCCtx->hashLog3; /* must be before ZSTD_resetCCtx_advanced */ + ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, 0); + dstCCtx->params.fParams.contentSizeFlag = 0; /* content size different from the one set during srcCCtx init */ + + /* copy tables */ + { const size_t chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); + const size_t hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; + const size_t h3Size = (srcCCtx->hashLog3) ? 1 << srcCCtx->hashLog3 : 0; + const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); + } + + /* copy dictionary pointers */ + dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; + dstCCtx->nextSrc = srcCCtx->nextSrc; + dstCCtx->base = srcCCtx->base; + dstCCtx->dictBase = srcCCtx->dictBase; + dstCCtx->dictLimit = srcCCtx->dictLimit; + dstCCtx->lowLimit = srcCCtx->lowLimit; + dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + + /* copy entropy tables */ + dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; + if (srcCCtx->flagStaticTables) { + memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); + memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); + memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); + memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); + } + + return 0; +} + + +/*! ZSTD_reduceTable() : +* reduce table indexes by `reducerValue` */ +static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) +{ + U32 u; + for (u=0 ; u < size ; u++) { + if (table[u] < reducerValue) table[u] = 0; + else table[u] -= reducerValue; + } +} + +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + { const U32 hSize = 1 << zc->params.cParams.hashLog; + ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } + + { const U32 chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); + ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } + + { const U32 h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* Frame format description + Frame Header - [ Block Header - Block ] - Frame End + 1) Frame Header + - 4 bytes - Magic Number : ZSTD_MAGICNUMBER (defined within zstd_static.h) + - 1 byte - Frame Descriptor + 2) Block Header + - 3 bytes, starting with a 2-bits descriptor + Uncompressed, Compressed, Frame End, unused + 3) Block + See Block Format Description + 4) Frame End + - 3 bytes, compatible with Block Header +*/ + + +/* Frame descriptor + + 1 byte, using : + bit 0-3 : windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN (see zstd_internal.h) + bit 4 : minmatch 4(0) or 3(1) + bit 5 : reserved (must be zero) + bit 6-7 : Frame content size : unknown, 1 byte, 2 bytes, 8 bytes + + Optional : content size (0, 1, 2 or 8 bytes) + 0 : unknown + 1 : 0-255 bytes + 2 : 256 - 65535+256 + 8 : up to 16 exa +*/ + + +/* Block format description + + Block = Literal Section - Sequences Section + Prerequisite : size of (compressed) block, maximum size of regenerated data + + 1) Literal Section + + 1.1) Header : 1-5 bytes + flags: 2 bits + 00 compressed by Huff0 + 01 unused + 10 is Raw (uncompressed) + 11 is Rle + Note : using 01 => Huff0 with precomputed table ? + Note : delta map ? => compressed ? + + 1.1.1) Huff0-compressed literal block : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1.1.2) Raw (uncompressed) literal block header : 1-3 bytes + size : 5 bits: (IS_RAW<<6) + (0<<4) + size + 12 bits: (IS_RAW<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RAW<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.3) Rle (repeated single byte) literal block header : 1-3 bytes + size : 5 bits: (IS_RLE<<6) + (0<<4) + size + 12 bits: (IS_RLE<<6) + (2<<4) + (size>>8) + size&255 + 20 bits: (IS_RLE<<6) + (3<<4) + (size>>16) + size>>8&255 + size&255 + + 1.1.4) Huff0-compressed literal block, using precomputed CTables : 3-5 bytes + srcSize < 1 KB => 3 bytes (2-2-10-10) => single stream + srcSize < 1 KB => 3 bytes (2-2-10-10) + srcSize < 16KB => 4 bytes (2-2-14-14) + else => 5 bytes (2-2-18-18) + big endian convention + + 1- CTable available (stored into workspace ?) + 2- Small input (fast heuristic ? Full comparison ? depend on clevel ?) + + + 1.2) Literal block content + + 1.2.1) Huff0 block, using sizes from header + See Huff0 format + + 1.2.2) Huff0 block, using prepared table + + 1.2.3) Raw content + + 1.2.4) single byte + + + 2) Sequences section + + - Nb Sequences : 2 bytes, little endian + - Control Token : 1 byte (see below) + - Dumps Length : 1 or 2 bytes (depending on control token) + - Dumps : as stated by dumps length + - Literal Lengths FSE table (as needed depending on encoding method) + - Offset Codes FSE table (as needed depending on encoding method) + - Match Lengths FSE table (as needed depending on encoding method) + + 2.1) Control Token + 8 bits, divided as : + 0-1 : dumpsLength + 2-3 : MatchLength, FSE encoding method + 4-5 : Offset Codes, FSE encoding method + 6-7 : Literal Lengths, FSE encoding method + + FSE encoding method : + FSE_ENCODING_RAW : uncompressed; no header + FSE_ENCODING_RLE : single repeated value; header 1 byte + FSE_ENCODING_STATIC : use prepared table; no header + FSE_ENCODING_DYNAMIC : read NCount +*/ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); + + /* Build header */ + ostart[0] = (BYTE)(srcSize>>16); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE) srcSize; + ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ + + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RAW<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (0<<5) + srcSize); + break; + case 2: /* 2 - 2 - 12 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (2<<4) + (srcSize >> 8)); + ostart[1] = (BYTE)srcSize; + break; + default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + ostart[0] = (BYTE)((IS_RLE<<6) + (3<<4) + (srcSize >> 16)); + ostart[1] = (BYTE)(srcSize>>8); + ostart[2] = (BYTE)srcSize; + break; + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + +static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const minGain = ZSTD_minGain(srcSize); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + U32 hType = IS_HUF; + size_t cLitSize; + + + /* small ? don't even attempt compression (speed opt) */ +# define LITERAL_NOENTROPY 63 + { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + if (zc->flagStaticTables && (lhSize==3)) { + hType = IS_PCH; + singleStream = 1; + cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); + } else { + cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12) + : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 12); + } + + if ((cLitSize==0) || (cLitSize >= srcSize - minGain)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + if (cLitSize==1) + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + ostart[0] = (BYTE)((srcSize>>6) + (singleStream << 4) + (hType<<6)); + ostart[1] = (BYTE)((srcSize<<2) + (cLitSize>>8)); + ostart[2] = (BYTE)(cLitSize); + break; + case 4: /* 2 - 2 - 14 - 14 */ + ostart[0] = (BYTE)((srcSize>>10) + (2<<4) + (hType<<6)); + ostart[1] = (BYTE)(srcSize>> 2); + ostart[2] = (BYTE)((srcSize<<6) + (cLitSize>>8)); + ostart[3] = (BYTE)(cLitSize); + break; + default: /* should not be necessary, lhSize is only {3,4,5} */ + case 5: /* 2 - 2 - 18 - 18 */ + ostart[0] = (BYTE)((srcSize>>14) + (3<<4) + (hType<<6)); + ostart[1] = (BYTE)(srcSize>>6); + ostart[2] = (BYTE)((srcSize<<2) + (cLitSize>>16)); + ostart[3] = (BYTE)(cLitSize>>8); + ostart[4] = (BYTE)(cLitSize); + break; + } + return lhSize+cLitSize; +} + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) +{ + /* LL codes */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + const U16* const llTable = seqStorePtr->litLengthStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + size_t u; + for (u=0; u<nbSeq; u++) { + U32 const ll = llTable[u]; + llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + } + if (seqStorePtr->longLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + } + + /* Offset codes */ + { const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + size_t u; + for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]); + } + + /* ML codes */ + { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + const BYTE ML_deltaCode = 36; + const U16* const mlTable = seqStorePtr->matchLengthStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + size_t u; + for (u=0; u<nbSeq; u++) { + U32 const ml = mlTable[u]; + mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; + } +} + + +size_t ZSTD_compressSequences(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + const seqStore_t* seqStorePtr = &(zc->seqStore); + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = zc->litlengthCTable; + FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; + FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + U16* const llTable = seqStorePtr->litLengthStart; + U16* const mlTable = seqStorePtr->matchLengthStart; + const U32* const offsetTable = seqStorePtr->offsetStart; + const U32* const offsetTableEnd = seqStorePtr->offset; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = offsetTableEnd - offsetTable; + BYTE* seqHead; + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) goto _check_compressibility; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr, nbSeq); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + LLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + } } + + /* CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { + FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); + Offtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = FSE_ENCODING_DYNAMIC; + } } + + /* CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + MLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = FSE_ENCODING_DYNAMIC; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + zc->flagStaticTables = 0; + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); + if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ + const BYTE ofCode = ofCodeTable[n]; + const BYTE mlCode = mlCodeTable[n]; + const BYTE llCode = llCodeTable[n]; + const U32 llBits = LL_bits[llCode]; + const U32 mlBits = ML_bits[mlCode]; + const U32 ofBits = ofCode; /* 32b*/ /* 64b*/ + /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, llTable[n], llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, mlTable[n], mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + /* check compressibility */ +_check_compressibility: + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) return 0; } + + return op - ostart; +} + + +/*! ZSTD_storeSeq() : + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + `offsetCode` : distance to match, or 0 == repCode. + `matchCode` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) +{ +#if 0 /* for debug */ + static const BYTE* g_start = NULL; + const U32 pos = (U32)(literals - g_start); + if (g_start==NULL) g_start = literals; + if ((pos > 2587900) && (pos < 2588050)) + printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#endif + ZSTD_statsUpdatePrices(&seqStorePtr->stats, litLength, literals, offsetCode, matchCode); + + /* copy Literals */ + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->litLength - seqStorePtr->litLengthStart); } + *seqStorePtr->litLength++ = (U16)litLength; + + /* match offset */ + *(seqStorePtr->offset++) = (U32)offsetCode + 1; + + /* match Length */ + if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->matchLength - seqStorePtr->matchLengthStart); } + *seqStorePtr->matchLength++ = (U16)matchCode; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while ((pIn<pInLimit-(sizeof(size_t)-1))) { + size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; + return (size_t)(pIn - pStart); +} + +/** ZSTD_count_2segments() : +* can count match length with `ip` & `match` in 2 different segments. +* convention : on reaching mEnd, match count continue starting from iStart +*/ +static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) +{ + size_t matchLength; + const BYTE* vEnd = ip + (mEnd - match); + if (vEnd > iEnd) vEnd = iEnd; + matchLength = ZSTD_count(ip, match, vEnd); + if (match + matchLength == mEnd) + matchLength += ZSTD_count(ip+matchLength, iStart, iEnd); + return matchLength; +} + + +/*-************************************* +* Hashes +***************************************/ +static const U32 prime3bytes = 506832829U; +static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } +static size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + } +} + + +/*-************************************* +* Fast Scan +***************************************/ +static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hBits = zc->params.cParams.hashLog; + const BYTE* const base = zc->base; + const BYTE* ip = base + zc->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - 8; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hBits = zc->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(zc->seqStore); + const BYTE* const base = zc->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowIndex = zc->dictLimit; + const BYTE* const lowest = base + lowIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mlCode; + size_t offset; + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 current = (U32)(ip-base); + hashTable[h] = current; /* update hash table */ + + if (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)) { /* note : by construction, offset_1 <= current */ + mlCode = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); + } else { + if ( (matchIndex <= lowIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + mlCode = ZSTD_count(ip+EQUAL_READ32, match+EQUAL_READ32, iend) + EQUAL_READ32; + offset = ip-match; + while ((ip>anchor) && (match>lowest) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + } + + /* match found */ + ip += mlCode; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; + { size_t const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rlCode-MINMATCH); + ip += rlCode; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowLimit = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + + U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + + + /* init */ + ZSTD_resetSeqStore(seqStorePtr); + /* skip first position to avoid read overflow during repcode match check */ + hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); + ip += REPCODE_STARTVALUE; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mlCode; + U32 offset; + hashTable[h] = current; /* update hash table */ + + if ( ((repIndex >= dictLimit) || (repIndex <= dictLimit-4)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); + } else { + if ( (matchIndex < lowLimit) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mlCode = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; + while ((ip>anchor) && (match>lowMatchPtr) && (ip[-1] == match[-1])) { ip--; match--; mlCode++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mlCode-MINMATCH); + } } + + /* found a match : store it */ + ip += mlCode; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( ((repIndex2 <= dictLimit-4) || (repIndex2 >= dictLimit)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +* ip : assumed <= iend-8 . +* @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match = base + matchIndex; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + const U32 windowLow = zc->lowLimit; + U32 matchEndIdx = current+8; + size_t bestLength = 8; + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ +#if 0 /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); + if (matchEndIdx > current + 8) return matchEndIdx - current - 8; + return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit(current-matchIndex+1) - ZSTD_highbit((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return bestLength; +} + + +static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + + +/* *********************** +* Hash Chain +*************************/ + +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + + +/* Update chains up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) { + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=EQUAL_READ32-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + + for ( ; (matchIndex>lowLimit) && (nbAttempts) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; + } + + /* save best solution */ + if (currentMl > ml) { ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; + + U32 const maxSearches = 1 << ctx->params.cParams.searchLog; + U32 const mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + + /* init */ + U32 rep[ZSTD_REP_INIT]; + { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep[0])) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip<ilimit) { + ip ++; + if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { + size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip<ilimit)) { + ip ++; + if ((offset) && (MEM_read32(ip) == MEM_read32(ip - rep[0]))) { + size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[0], iend) + EQUAL_READ32; + int const gain2 = (int)(ml2 * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ + { start--; matchLength++; } + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-rep[1], iend) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + ZSTD_statsUpdatePrices(&seqStorePtr->stats, lastLLSize, anchor, 0, 0); + } +} + + +static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; + + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + /* init */ + U32 rep[ZSTD_REP_INIT]; + { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { + const U32 repIndex = (U32)(current+1 - rep[0]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip<ilimit) { + ip ++; + current++; + /* check repCode */ + if (offset) { + const U32 repIndex = (U32)(current - rep[0]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip<ilimit)) { + ip ++; + current++; + /* check repCode */ + if (offset) { + const U32 repIndex = (U32)(current - rep[0]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - rep[1]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + offset = rep[1]; rep[1] = rep[0]; rep[0] = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +} + + + +/* The optimal parser */ +#include "zstd_opt.h" + +static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_generic(ctx, src, srcSize); +} + +static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); +} + + +typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +{ + static const ZSTD_blockCompressor blockCompressor[2][6] = { +#if 1 + { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, +#else + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }, +#endif + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } + }; + + return blockCompressor[extDict][(U32)strat]; +} + + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + blockCompressor(zc, src, srcSize); + return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); +} + + + + +static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t blockSize = zc->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + const U32 maxDist = 1 << zc->params.cParams.windowLog; + ZSTD_stats_t* stats = &zc->seqStore.stats; + + ZSTD_statsInit(stats); + + while (remaining) { + size_t cSize; + ZSTD_statsResetFreqs(stats); + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) { + /* enforce maxDist */ + U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist; + if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit; + if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit; + } + + cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + } else { + op[0] = (BYTE)(cSize>>16); + op[1] = (BYTE)(cSize>>8); + op[2] = (BYTE)cSize; + op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ + cSize += 3; + } + + remaining -= blockSize; + dstCapacity -= cSize; + ip += blockSize; + op += cSize; + } + + ZSTD_statsPrint(stats, zc->params.cParams.searchLength); + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_parameters params, U64 pledgedSrcSize) +{ BYTE* const op = (BYTE*)dst; + U32 const fcsId = params.fParams.contentSizeFlag ? + (pledgedSrcSize>0) + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) : /* 0-3 */ + 0; + BYTE const fdescriptor = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */ + | (fcsId << 6) ); + size_t const hSize = ZSTD_frameHeaderSize_min + ZSTD_fcs_fieldSize[fcsId]; + if (hSize > dstCapacity) return ERROR(dstSize_tooSmall); + + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + op[4] = fdescriptor; + switch(fcsId) + { + default: /* impossible */ + case 0 : break; + case 1 : op[5] = (BYTE)(pledgedSrcSize); break; + case 2 : MEM_writeLE16(op+5, (U16)(pledgedSrcSize-256)); break; + case 3 : MEM_writeLE64(op+5, (U64)(pledgedSrcSize)); break; + } + return hSize; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame) +{ + const BYTE* const ip = (const BYTE*) src; + size_t fhSize = 0; + + if (zc->stage==0) return ERROR(stage_wrong); + if (frame && (zc->stage==1)) { /* copy saved header */ + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, srcSize); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + zc->stage = 2; + } + + /* Check if blocks follow each other */ + if (src != zc->nextSrc) { + /* not contiguous */ + size_t const delta = zc->nextSrc - ip; + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base -= delta; + zc->nextToUpdate = zc->dictLimit; + if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit; /* too small extDict */ + } + + /* preemptive overflow correction */ + if (zc->lowLimit > (1<<30)) { + U32 const btplus = (zc->params.cParams.strategy == ZSTD_btlazy2) || (zc->params.cParams.strategy == ZSTD_btopt); + U32 const chainMask = (1 << (zc->params.cParams.chainLog - btplus)) - 1; + U32 const newLowLimit = zc->lowLimit & chainMask; /* preserve position % chainSize */ + U32 const correction = zc->lowLimit - newLowLimit; + ZSTD_reduceIndex(zc, correction); + zc->base += correction; + zc->dictBase += correction; + zc->lowLimit = newLowLimit; + zc->dictLimit -= correction; + if (zc->nextToUpdate < correction) zc->nextToUpdate = 0; + else zc->nextToUpdate -= correction; + } + + /* if input and dictionary overlap : reduce dictionary (presumed modified by input) */ + if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit)) { + zc->lowLimit = (U32)(ip + srcSize - zc->dictBase); + if (zc->lowLimit > zc->dictLimit) zc->lowLimit = zc->dictLimit; + } + + zc->nextSrc = ip + srcSize; + { size_t const cSize = frame ? + ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : + ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + return cSize + fhSize; + } +} + + +size_t ZSTD_compressContinue (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); +} + + +size_t ZSTD_compressBlock(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize > ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); + ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", zc->base, zc->params.cParams.searchLength); + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 0); +} + + +static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + /* input becomes current prefix */ + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base += ip - zc->nextSrc; + zc->nextToUpdate = zc->dictLimit; + zc->loadedDictEnd = (U32)(iend - zc->base); + + zc->nextSrc = iend; + if (srcSize <= 8) return 0; + + switch(zc->params.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.cParams.searchLength); + break; + + case ZSTD_btlazy2: + case ZSTD_btopt: + ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + break; + + default: + return ERROR(GENERIC); /* strategy doesn't exist; impossible */ + } + + zc->nextToUpdate = zc->loadedDictEnd; + return 0; +} + + +/* Dictionary format : + Magic == ZSTD_DICT_MAGIC (4 bytes) + HUF_writeCTable(256) + Dictionary content +*/ +/*! ZSTD_loadDictEntropyStats() : + @return : size read from dictionary */ +static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +{ + /* note : magic number already checked */ + size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; + short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + + size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + zc->flagStaticTables = 1; + dict = (const char*)dict + hufHeaderSize; + dictSize -= hufHeaderSize; + + offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + offcodeHeaderSize; + dictSize -= offcodeHeaderSize; + + matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + matchlengthHeaderSize; + dictSize -= matchlengthHeaderSize; + + litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); + + return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize; +} + +/** ZSTD_compress_insertDictionary() : +* @return : 0, or an error code */ +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +{ + if ((dict==NULL) || (dictSize<=4)) return 0; + + /* default : dict is pure content */ + if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); + + /* known magic number : dict is parsed for entropy stats and content */ + { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; + if (ZSTD_isError(eSize)) return eSize; + return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + } +} + + +/*! ZSTD_compressBegin_internal() : +* @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + { U32 const hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN); + zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0; } + + { size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, 1); + if (ZSTD_isError(resetError)) return resetError; } + + return ZSTD_compress_insertDictionary(zc, dict, dictSize); +} + + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + /* compression parameters verification and optimization */ + { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params; + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 0; + ZSTD_adjustCParams(¶ms.cParams, 0, dictSize); + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); + return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); +} + + +size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) +{ + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin compressionLevel=%d\n", zc->base, compressionLevel); + return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); +} + + +/*! ZSTD_compressEnd() : +* Write frame epilogue. +* @return : nb of bytes written into dst (or an error code) */ +size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity) +{ + BYTE* op = (BYTE*)dst; + size_t fhSize = 0; + + /* not even init ! */ + if (zc->stage==0) return ERROR(stage_wrong); + + /* special case : empty frame */ + if (zc->stage==1) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + zc->stage = 2; + } + + /* frame epilogue */ + if (dstCapacity < 3) return ERROR(dstSize_tooSmall); + op[0] = (BYTE)(bt_end << 6); + op[1] = 0; + op[2] = 0; + + zc->stage = 0; /* return to "created by not init" status */ + return 3+fhSize; +} + + +size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + { size_t const errorCode = ZSTD_copyCCtx(cctx, preparedCCtx); + if (ZSTD_isError(errorCode)) return errorCode; + } + { size_t const cSize = ZSTD_compressContinue(cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + + { size_t const endSize = ZSTD_compressEnd(cctx, (char*)dst+cSize, dstCapacity-cSize); + if (ZSTD_isError(endSize)) return endSize; + return cSize + endSize; + } } +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + /* Init */ + { size_t const errorCode = ZSTD_compressBegin_internal(ctx, dict, dictSize, params, srcSize); + if(ZSTD_isError(errorCode)) return errorCode; } + + /* body (compression) */ + { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); + if(ZSTD_isError(oSize)) return oSize; + op += oSize; + dstCapacity -= oSize; } + + /* Close frame */ + { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); + if(ZSTD_isError(oSize)) return oSize; + op += oSize; } + + return (op - ostart); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize); + if (ZSTD_isError(errorCode)) return errorCode; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params; + ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); + params.cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); + params.fParams.contentSizeFlag = 1; + ZSTD_adjustCParams(¶ms.cParams, srcSize, dictSize); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + memset(&ctxBody, 0, sizeof(ctxBody)); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; just free heap content */ + return result; +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_DEFAULT_CLEVEL 5 +#define ZSTD_MAX_CLEVEL 22 +unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" */ + /* W, C, H, S, L, TL, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 19, 13, 14, 1, 7, 4, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 4, ZSTD_fast }, /* level 2 */ + { 20, 18, 20, 1, 6, 4, ZSTD_fast }, /* level 3 */ + { 20, 13, 17, 2, 5, 4, ZSTD_greedy }, /* level 4.*/ + { 20, 15, 18, 3, 5, 4, ZSTD_greedy }, /* level 5 */ + { 21, 16, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 17, 20, 3, 5, 4, ZSTD_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 8.*/ + { 21, 20, 20, 3, 5, 4, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 21, 4, 5, 4, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, 4, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 4, ZSTD_lazy2 }, /* level 13 */ + { 22, 21, 22, 6, 5, 4, ZSTD_lazy2 }, /* level 14 */ + { 22, 21, 21, 5, 5, 4, ZSTD_btlazy2 }, /* level 15 */ + { 23, 22, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 16 */ + { 23, 23, 22, 5, 5, 4, ZSTD_btlazy2 }, /* level 17.*/ + { 23, 23, 22, 6, 5, 24, ZSTD_btopt }, /* level 18.*/ + { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19.*/ + { 25, 26, 23, 7, 3, 64, ZSTD_btopt }, /* level 20.*/ + { 26, 26, 23, 7, 3,256, ZSTD_btopt }, /* level 21.*/ + { 27, 27, 25, 9, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 */ + { 18, 13, 14, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 18, 15, 17, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 18, 13, 15, 1, 5, 4, ZSTD_greedy }, /* level 3.*/ + { 18, 15, 17, 1, 5, 4, ZSTD_greedy }, /* level 4.*/ + { 18, 16, 17, 4, 5, 4, ZSTD_greedy }, /* level 5 */ + { 18, 17, 17, 5, 5, 4, ZSTD_greedy }, /* level 6 */ + { 18, 17, 17, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 18, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 18, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 11.*/ + { 18, 18, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 12.*/ + { 18, 19, 17, 7, 4, 4, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 18, 8, 4, 24, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 18, 8, 3, 48, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 18, 8, 3, 96, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btopt }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btopt }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ + { 17, 12, 13, 1, 6, 4, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 4, ZSTD_fast }, /* level 2 */ + { 17, 13, 14, 2, 5, 4, ZSTD_greedy }, /* level 3 */ + { 17, 13, 15, 3, 4, 4, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 4, 4, 4, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 15, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 4, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 4, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 4, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 4, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 17, 8, 4, 4, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 17, 6, 4, 4, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ + { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btopt }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btopt }, /* level 21.*/ + { 17, 18, 17, 11, 3,256, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 -- never used */ + { 14, 14, 14, 1, 4, 4, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 4, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 4, 4, 4, ZSTD_greedy }, /* level 3.*/ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 4.*/ + { 14, 14, 14, 4, 4, 4, ZSTD_lazy2 }, /* level 5 */ + { 14, 14, 14, 5, 4, 4, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ + { 14, 14, 14, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btopt }, /* level 22.*/ +}, +}; + +/*! ZSTD_getParams() : +* @return ZSTD_parameters structure for a selected compression level and srcSize. +* `srcSize` value is optional, select 0 if not known */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, U64 srcSize, size_t dictSize) +{ + ZSTD_compressionParameters cp; + size_t const addedSize = srcSize ? 0 : 500; + U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + if (compressionLevel < 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; + if (compressionLevel==0) compressionLevel = 1; + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + cp = ZSTD_defaultCParameters[tableID][compressionLevel]; + if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ + if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; + if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; + if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; + } + return cp; +} diff --git a/contrib/libs/zstd06/compress/zstd_opt.h b/contrib/libs/zstd06/compress/zstd_opt.h new file mode 100644 index 0000000000..200eaf02a3 --- /dev/null +++ b/contrib/libs/zstd06/compress/zstd_opt.h @@ -0,0 +1,1033 @@ +#include <contrib/libs/zstd06/renames.h> +/* + ZSTD Optimal mode + Copyright (C) 2016, Przemyslaw Skibinski, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Zstd source repository : https://www.zstd.net +*/ + +/* Note : this file is intended to be included within zstd_compress.c */ + + +#define ZSTD_FREQ_DIV 5 + +/*-************************************* +* Price functions for optimal parser +***************************************/ +FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) +{ + ssPtr->log2matchLengthSum = ZSTD_highbit(ssPtr->matchLengthSum+1); + ssPtr->log2litLengthSum = ZSTD_highbit(ssPtr->litLengthSum+1); + ssPtr->log2litSum = ZSTD_highbit(ssPtr->litSum+1); + ssPtr->log2offCodeSum = ZSTD_highbit(ssPtr->offCodeSum+1); + ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); +} + + +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) +{ + unsigned u; + + ssPtr->cachedLiterals = NULL; + ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; + + if (ssPtr->litLengthSum == 0) { + ssPtr->litSum = (2<<Litbits); + ssPtr->litLengthSum = MaxLL+1; + ssPtr->matchLengthSum = MaxML+1; + ssPtr->offCodeSum = (MaxOff+1); + ssPtr->matchSum = (2<<Litbits); + + for (u=0; u<=MaxLit; u++) + ssPtr->litFreq[u] = 2; + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->offCodeSum = 0; + ssPtr->matchSum = 0; + ssPtr->litSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); + } + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } + + ZSTD_setLog2Prices(ssPtr); +} + + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + if (litLength == 0) + return ssPtr->log2litLengthSum - ZSTD_highbit(ssPtr->litLengthFreq[0]+1); + + /* literals */ + if (ssPtr->cachedLiterals == literals) { + U32 additional = litLength - ssPtr->cachedLitLength; + const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; + price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; + for (u=0; u < additional; u++) + price -= ZSTD_highbit(ssPtr->litFreq[literals2[u]]+1); + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } else { + price = litLength * ssPtr->log2litSum; + for (u=0; u < litLength; u++) + price -= ZSTD_highbit(ssPtr->litFreq[literals[u]]+1); + + if (litLength >= 12) { + ssPtr->cachedLiterals = literals; + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } + } + + /* literal Length */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit(litLength) + LL_deltaCode : LL_Code[litLength]; + price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit(ssPtr->litLengthFreq[llCode]+1); + } + + return price; +} + + +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + /* offset */ + BYTE offCode = (BYTE)ZSTD_highbit(offset+1); + U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]+1); + + /* match Length */ + { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit(matchLength) + ML_deltaCode : ML_Code[matchLength]; + price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit(seqStorePtr->matchLengthFreq[mlCode]+1); + } + + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; +} + + +MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + seqStorePtr->litSum += litLength; + for (u=0; u < litLength; u++) + seqStorePtr->litFreq[literals[u]]++; + + /* literal Length */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit(litLength) + LL_deltaCode : LL_Code[litLength]; + seqStorePtr->litLengthFreq[llCode]++; + seqStorePtr->litLengthSum++; + } + + /* match offset */ + { BYTE offCode = (BYTE)ZSTD_highbit(offset+1); + seqStorePtr->offCodeSum++; + seqStorePtr->offCodeFreq[offCode]++; + } + + /* match Length */ + { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit(matchLength) + ML_deltaCode : ML_Code[matchLength]; + seqStorePtr->matchLengthFreq[mlCode]++; + seqStorePtr->matchLengthSum++; + } + + ZSTD_setLog2Prices(seqStorePtr); +} + + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \ + } + + + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; + const BYTE* const base = zc->base; + U32 idx = zc->nextToUpdate3; + const U32 target = zc->nextToUpdate3 = (U32)(ip - base); + const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +static U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + U32 nbCompares, const U32 mls, + U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) +{ + const BYTE* const base = zc->base; + const U32 current = (U32)(ip-base); + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const hashTable = zc->hashTable; + U32 matchIndex = hashTable[h]; + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + + const U32 minMatch = (mls == 3) ? 3 : 4; + size_t bestLength = minMatchLen-1; + + if (minMatch == 3) { /* HC3 match finder */ + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex3; + matches[mnum].len = (U32)currentMl; + mnum++; + if (currentMl > ZSTD_OPT_NUM) goto update; + if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ + } + } + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) { +#if ZSTD_OPT_DEBUG >= 5 + size_t ml; + if (matchIndex < dictLimit) + ml = ZSTD_count_2segments(ip, dictBase + matchIndex, iLimit, dictEnd, prefixStart); + else + ml = ZSTD_count(ip, match, ip+matchLength); + if (ml < matchLength) + printf("%d: ERROR_NOEXT: offset=%d matchLength=%d matchIndex=%d dictLimit=%d ml=%d\n", current, (int)(current - matchIndex), (int)matchLength, (int)matchIndex, (int)dictLimit, (int)ml), exit(0); +#endif + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; + } + } else { + match = dictBase + matchIndex; +#if ZSTD_OPT_DEBUG >= 5 + if (memcmp(match, ip, matchLength) != 0) + printf("%d: ERROR_EXT: matchLength=%d ZSTD_count=%d\n", current, (int)matchLength, (int)ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart)), exit(0); +#endif + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + ZSTD_LOG_PARSER("%d: ZSTD_INSERTBTANDGETALLMATCHES=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)matchLength, (int)(current - matchIndex), dictBase, dictEnd, prefixStart, ip, match); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = ZSTD_REP_MOVE + current - matchIndex; + matches[mnum].len = (U32)matchLength; + mnum++; + if (matchLength > ZSTD_OPT_NUM) break; + if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + +update: + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return mnum; +} + + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const BYTE* const prefixStart = base + ctx->dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + + /* init */ + U32 offset, rep[ZSTD_REP_INIT]; + { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + ZSTD_rescaleFreqs(seqStorePtr); + if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE; + + ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) + if (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - rep[i], minMatch)) { + /* repcode : we take it */ + mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-rep[i], iend) + minMatch; + ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen); + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + best_off = (i<=1 && ip == anchor) ? 1-i : i; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + + ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); + if (!last_pos && !match_num) { ip++; continue; } + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + /* set prices using matches at position = 0 */ + best_mlen = (last_pos) ? last_pos : minMatch; + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ + mlen++; + } } + + if (last_pos < minMatch) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; } + opt[0].mlen = 1; + opt[0].litlen = litlen; + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off >= ZSTD_REP_NUM) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } + + ZSTD_LOG_PARSER("%d: CURRENT_NoExt price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); + + best_mlen = minMatch; + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) + if (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - opt[cur].rep[i], minMatch)) { /* check rep */ + mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - opt[cur].rep[i], iend) + minMatch; + ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off); + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } + + if (mlen > best_mlen) best_mlen = mlen; + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); + + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); + ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH); + } + + // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } // for (cur = 1; cur <= last_pos; cur++) + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + for (u = 1; u <= last_pos; u++) + ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); + ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep[0]=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep[0]); + + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos;) { + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + // ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + + if (offset >= ZSTD_REP_NUM) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE; + } else { + if (offset != 0) { + best_off = rep[offset]; + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + if (litLength == 0 && offset<=1) offset = 1-offset; + } + + ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + +#if ZSTD_OPT_DEBUG >= 5 + U32 ml2; + if (offset >= ZSTD_REP_NUM) + ml2 = (U32)ZSTD_count(ip, ip-(offset-ZSTD_REP_MOVE), iend); + else + ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend); + if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { + printf("%d: ERROR_NoExt iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + if (ip < anchor) { + printf("%d: ERROR_NoExt ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + if (ip + mlen > iend) { + printf("%d: ERROR_NoExt ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } +#endif + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + { /* Last Literals */ + size_t lastLLSize = iend - anchor; + ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)lastLLSize); + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + + /* init */ + U32 offset, rep[ZSTD_REP_INIT]; + { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) rep[i]=REPCODE_STARTVALUE; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_resetSeqStore(seqStorePtr); + ZSTD_rescaleFreqs(seqStorePtr); + if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + + ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + U32 current = (U32)(ip-base); + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + inr = ip; + opt[0].litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) { + const U32 repIndex = (U32)(current - rep[i]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + ZSTD_LOG_PARSER("%d: start try REP rep[%d]=%d mlen=%d\n", (int)(ip-base), i, (int)rep[i], (int)mlen); + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + + best_off = (i<=1 && ip == anchor) ? 1-i : i; + litlen = opt[0].litlen; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ + + ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos); + if (!last_pos && !match_num) { ip++; continue; } + + { U32 i; for (i=0; i<ZSTD_REP_INIT; i++) opt[0].rep[i] = rep[i]; } + opt[0].mlen = 1; + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + best_mlen = (last_pos) ? last_pos : minMatch; + + // set prices using matches at position = 0 + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos); + litlen = opt[0].litlen; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); + mlen++; + } } + + if (last_pos < minMatch) { + // ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + ip++; continue; + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen))) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off >= ZSTD_REP_NUM) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE; + ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = opt[cur-mlen].rep[opt[cur].off]; + ZSTD_LOG_ENCODE("%d: COPYREP_NOR cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]); + } + + ZSTD_LOG_PARSER("%d: CURRENT_Ext price[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); + best_mlen = 0; + + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) { + const U32 repIndex = (U32)(current+cur - opt[cur].rep[i]); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + ZSTD_LOG_PARSER("%d: Found REP %d/%d mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), i, ZSTD_REP_NUM, mlen, i, opt[cur].rep[i], cur, opt[cur].off); + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos); + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = (i<=1 && opt[cur].mlen != 1) ? 1-i : i; + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } + + best_mlen = mlen; + ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_off, price, litlen); + + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); + ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num); + + if (match_num > 0 && matches[match_num-1].len > sufficient_len) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; + + // ZSTD_LOG_PARSER("%d: Found1 cur=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, matches[u].len, matches[u].off, best_mlen, last_pos); + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off, mlen - MINMATCH); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH); + } + + // ZSTD_LOG_PARSER("%d: Found2 mlen=%d best_mlen=%d off=%d price=%d litlen=%d\n", (int)(inr-base), mlen, best_mlen, matches[u].off, price, litlen); + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } /* for (cur = 1; cur <= last_pos; cur++) */ + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + for (u = 1; u <= last_pos; u++) + ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); + ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep[0]=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep[0]); + + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos; ) { + ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep[0], opt[u].rep[1]); + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep[0], opt[cur].rep[1]); + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + // ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + + if (offset >= ZSTD_REP_NUM) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE; + } else { + if (offset != 0) { + best_off = rep[offset]; + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + if (litLength == 0 && offset<=1) offset = 1-offset; + } + + ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); + +#if ZSTD_OPT_DEBUG >= 5 + U32 ml2; + if (offset >= ZSTD_REP_NUM) { + best_off = offset - ZSTD_REP_MOVE; + if (best_off > (size_t)(ip - prefixStart)) { + const BYTE* match = dictEnd - (best_off - (ip - prefixStart)); + ml2 = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); + ZSTD_LOG_PARSER("%d: ZSTD_count_2segments=%d offset=%d dictBase=%p dictEnd=%p prefixStart=%p ip=%p match=%p\n", (int)current, (int)ml2, (int)best_off, dictBase, dictEnd, prefixStart, ip, match); + } + else ml2 = (U32)ZSTD_count(ip, ip-offset, iend); + } + else ml2 = (U32)ZSTD_count(ip, ip-rep[0], iend); + if ((offset >= 8) && (ml2 < mlen || ml2 < minMatch)) { + printf("%d: ERROR_Ext iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); } + if (ip < anchor) { + printf("%d: ERROR_Ext ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } + if (ip + mlen > iend) { + printf("%d: ERROR_Ext ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); } +#endif + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + { /* Last Literals */ + size_t lastLLSize = iend - anchor; + ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize)); + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} |