aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/zstd/lib/compress
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-04-20 12:08:07 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-04-20 12:08:07 +0300
commiteb16979262f120e52cda528d30de5a0cfb4ed0c9 (patch)
tree4642aac240bc0889ba11a987e6748d4fabbc8230 /contrib/libs/zstd/lib/compress
parentd0c642847472b3222a1a73a941917d393cc6ddf3 (diff)
downloadydb-eb16979262f120e52cda528d30de5a0cfb4ed0c9.tar.gz
Update contrib/libs/zstd to 1.5.5
Diffstat (limited to 'contrib/libs/zstd/lib/compress')
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_compress.c313
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_compress_internal.h80
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_cwksp.h182
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_lazy.c154
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_opt.c2
-rw-r--r--contrib/libs/zstd/lib/compress/zstdmt_compress.c9
6 files changed, 498 insertions, 242 deletions
diff --git a/contrib/libs/zstd/lib/compress/zstd_compress.c b/contrib/libs/zstd/lib/compress/zstd_compress.c
index b55f684cd7..d6133e70b4 100644
--- a/contrib/libs/zstd/lib/compress/zstd_compress.c
+++ b/contrib/libs/zstd/lib/compress/zstd_compress.c
@@ -11,6 +11,7 @@
/*-*************************************
* Dependencies
***************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
@@ -26,7 +27,7 @@
#include "zstd_opt.h"
#include "zstd_ldm.h"
#include "zstd_compress_superblock.h"
-#include "../common/bits.h" /* ZSTD_highbit32 */
+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
/* ***************************************************************
* Tuning parameters
@@ -1177,16 +1178,39 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
{
+ ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
DEBUGLOG(4, "ZSTD_CCtx_setCParams");
- assert(cctx != NULL);
- if (cctx->streamStage != zcss_init) {
- /* All parameters in @cparams are allowed to be updated during MT compression.
- * This must be signaled, so that MT compression picks up the changes */
- cctx->cParamsChanged = 1;
- }
- /* only update if parameters are valid */
+ /* only update if all parameters are valid */
FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
- cctx->requestedParams.cParams = cparams;
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
+ return 0;
+}
+
+size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
+{
+ ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
+ DEBUGLOG(4, "ZSTD_CCtx_setFParams");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
+ return 0;
+}
+
+size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
+{
+ DEBUGLOG(4, "ZSTD_CCtx_setParams");
+ /* First check cParams, because we want to update all or none. */
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+ /* Next set fParams, because this could fail if the cctx isn't in init stage. */
+ FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
+ /* Finally set cParams, which should succeed. */
+ FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
return 0;
}
@@ -1208,9 +1232,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
ZSTD_compressionParameters* cParams);
/**
- * Initializes the local dict using the requested parameters.
- * NOTE: This does not use the pledged src size, because it may be used for more
- * than one compression.
+ * Initializes the local dictionary using requested parameters.
+ * NOTE: Initialization does not employ the pledged src size,
+ * because the dictionary may be used for multiple compressions.
*/
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
{
@@ -1223,8 +1247,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
return 0;
}
if (dl->cdict != NULL) {
- assert(cctx->cdict == dl->cdict);
/* Local dictionary already initialized. */
+ assert(cctx->cdict == dl->cdict);
return 0;
}
assert(dl->dictSize > 0);
@@ -1244,26 +1268,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
}
size_t ZSTD_CCtx_loadDictionary_advanced(
- ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+ ZSTD_CCtx* cctx,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
- "Can't load a dictionary when ctx is not in init stage.");
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
- ZSTD_clearAllDicts(cctx); /* in case one already exists */
- if (dict == NULL || dictSize == 0) /* no dictionary mode */
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't load a dictionary when cctx is not in init stage.");
+ ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */
+ if (dict == NULL || dictSize == 0) /* no dictionary */
return 0;
if (dictLoadMethod == ZSTD_dlm_byRef) {
cctx->localDict.dict = dict;
} else {
+ /* copy dictionary content inside CCtx to own its lifetime */
void* dictBuffer;
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
- "no malloc for static CCtx");
+ "static CCtx can't allocate for an internal copy of dictionary");
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
- RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+ RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
+ "allocation failed for dictionary content");
ZSTD_memcpy(dictBuffer, dict, dictSize);
- cctx->localDict.dictBuffer = dictBuffer;
- cctx->localDict.dict = dictBuffer;
+ cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */
+ cctx->localDict.dict = dictBuffer; /* read-only reference */
}
cctx->localDict.dictSize = dictSize;
cctx->localDict.dictContentType = dictContentType;
@@ -1333,7 +1361,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
- "Can't reset parameters only when not in init stage.");
+ "Reset parameters is only possible during init stage.");
ZSTD_clearAllDicts(cctx);
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
@@ -1592,7 +1620,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
- ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+ ? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
@@ -1883,6 +1911,19 @@ typedef enum {
ZSTD_resetTarget_CCtx
} ZSTD_resetTarget_e;
+/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
+static U64 ZSTD_bitmix(U64 val, U64 len) {
+ val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
+ val *= 0x9FB21C651E98DF25ULL;
+ val ^= (val >> 35) + len ;
+ val *= 0x9FB21C651E98DF25ULL;
+ return val ^ (val >> 28);
+}
+
+/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
+static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
+ ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
+}
static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
@@ -1910,6 +1951,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
}
ms->hashLog3 = hashLog3;
+ ms->lazySkipping = 0;
ZSTD_invalidateMatchState(ms);
@@ -1931,6 +1973,27 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_cwksp_clean_tables(ws);
}
+ if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+ /* Row match finder needs an additional table of hashes ("tags") */
+ size_t const tagTableSize = hSize;
+ /* We want to generate a new salt in case we reset a Cctx, but we always want to use
+ * 0 when we reset a Cdict */
+ if(forWho == ZSTD_resetTarget_CCtx) {
+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
+ ZSTD_advanceHashSalt(ms);
+ } else {
+ /* When we are not salting we want to always memset the memory */
+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+ ZSTD_memset(ms->tagTable, 0, tagTableSize);
+ ms->hashSalt = 0;
+ }
+ { /* Switch to 32-entry rows if searchLog is 5 (or more) */
+ U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
+ assert(cParams->hashLog >= rowLog);
+ ms->rowHashLog = cParams->hashLog - rowLog;
+ }
+ }
+
/* opt parser space */
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
@@ -1942,19 +2005,6 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
- if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
- { /* Row match finder needs an additional table of hashes ("tags") */
- size_t const tagTableSize = hSize*sizeof(U16);
- ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
- if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
- }
- { /* Switch to 32-entry rows if searchLog is 5 (or more) */
- U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
- assert(cParams->hashLog >= rowLog);
- ms->rowHashLog = cParams->hashLog - rowLog;
- }
- }
-
ms->cParams = *cParams;
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
@@ -2101,13 +2151,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &zc->blockState.matchState,
+ ws,
+ &params->cParams,
+ params->useRowMatchFinder,
+ crp,
+ needsIndexReset,
+ ZSTD_resetTarget_CCtx), "");
+
+ zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+ /* ldm hash table */
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
+ /* TODO: avoid memset? */
+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+ ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ ZSTD_window_init(&zc->ldmState.window);
+ zc->ldmState.loadedDictEnd = 0;
+ }
+
+ /* reserve space for block-level external sequences */
+ if (params->useSequenceProducer) {
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
+ zc->externalMatchCtx.seqBuffer =
+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
+ }
+
+ /* buffers */
+
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;
- /* buffers */
zc->bufferedPolicy = zbuff;
zc->inBuffSize = buffInSize;
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
@@ -2130,40 +2213,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
- zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
-
- FORWARD_IF_ERROR(ZSTD_reset_matchState(
- &zc->blockState.matchState,
- ws,
- &params->cParams,
- params->useRowMatchFinder,
- crp,
- needsIndexReset,
- ZSTD_resetTarget_CCtx), "");
-
- /* ldm hash table */
- if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
- /* TODO: avoid memset? */
- size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
- zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
- ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
- zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
- zc->maxNbLdmSequences = maxNbLdmSeq;
-
- ZSTD_window_init(&zc->ldmState.window);
- zc->ldmState.loadedDictEnd = 0;
- }
-
- /* reserve space for block-level external sequences */
- if (params->useSequenceProducer) {
- size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
- zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
- zc->externalMatchCtx.seqBuffer =
- (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
- }
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
- assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
zc->initialized = 1;
@@ -2338,10 +2390,11 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
}
/* copy tag table */
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
- size_t const tagTableSize = hSize*sizeof(U16);
+ size_t const tagTableSize = hSize;
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
- cdict->matchState.tagTable,
- tagTableSize);
+ cdict->matchState.tagTable,
+ tagTableSize);
+ cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
}
}
@@ -3858,9 +3911,10 @@ ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRe
const seqStore_t* const seqStore, U32 const nbSeq)
{
U32 idx = 0;
+ U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
for (; idx < nbSeq; ++idx) {
seqDef* const seq = seqStore->sequencesStart + idx;
- U32 const ll0 = (seq->litLength == 0);
+ U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
U32 const offBase = seq->offBase;
assert(offBase > 0);
if (OFFBASE_IS_REPCODE(offBase)) {
@@ -4576,31 +4630,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
}
}
-size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
}
+/* NOTE: Must just wrap ZSTD_compressContinue_public() */
+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
+}
-size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
{
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
assert(!ZSTD_checkCParams(cParams));
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
}
-size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+ return ZSTD_getBlockSize_deprecated(cctx);
+}
+
+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
- { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+ { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
+}
+
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
@@ -4644,31 +4718,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ip = iend - maxDictSize;
src = ip;
srcSize = maxDictSize;
- } }
+ }
+ }
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
/* We must have cleared our windows when our source is this large. */
assert(ZSTD_window_isEmpty(ms->window));
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
}
+ ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
- ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
- ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
- ms->forceNonContiguous = params->deterministicRefPrefix;
- if (loadLdmDict) {
+ if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+ ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
+ }
+
+ /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
+ if (params->cParams.strategy < ZSTD_btultra) {
+ U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
+ if (srcSize > maxDictSize) {
+ ip = iend - maxDictSize;
+ src = ip;
+ srcSize = maxDictSize;
+ }
}
+ ms->nextToUpdate = (U32)(ip - ms->window.base);
+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+ ms->forceNonContiguous = params->deterministicRefPrefix;
+
if (srcSize <= HASH_READ_SIZE) return 0;
ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
- if (loadLdmDict)
- ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
-
switch(params->cParams.strategy)
{
case ZSTD_fast:
@@ -4688,7 +4773,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
} else {
assert(params->useRowMatchFinder != ZSTD_ps_auto);
if (params->useRowMatchFinder == ZSTD_ps_enable) {
- size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
ZSTD_memset(ms->tagTable, 0, tagTableSize);
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
DEBUGLOG(4, "Using row-based hash table for lazy dict");
@@ -4991,8 +5076,8 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
&cctxParams, pledgedSrcSize);
}
-size_t
-ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+static size_t
+ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_CCtx_params cctxParams;
{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
@@ -5003,9 +5088,15 @@ ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
+size_t
+ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+ return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
+}
+
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
{
- return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+ return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
}
@@ -5075,9 +5166,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
#endif
}
-size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
size_t endResult;
size_t const cSize = ZSTD_compressContinue_internal(cctx,
@@ -5101,6 +5192,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
return cSize + endResult;
}
+/* NOTE: Must just wrap ZSTD_compressEnd_public() */
+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
+}
+
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -5129,7 +5228,7 @@ size_t ZSTD_compress_advanced_internal(
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
params, srcSize, ZSTDb_not_buffered) , "");
- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
@@ -5451,6 +5550,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
params.cParams = cParams;
params.useRowMatchFinder = useRowMatchFinder;
cdict->useRowMatchFinder = useRowMatchFinder;
+ cdict->compressionLevel = ZSTD_NO_CLEVEL;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
@@ -5530,12 +5630,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
/* ZSTD_compressBegin_usingCDict() :
* cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
}
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+ return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
+}
+
/*! ZSTD_compress_usingCDict_internal():
* Implementation of various ZSTD_compress_usingCDict* functions.
*/
@@ -5545,7 +5650,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
{
FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
}
/*! ZSTD_compress_usingCDict_advanced():
@@ -5803,7 +5908,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
&& (zcs->inBuffPos == 0) ) {
/* shortcut to compression pass directly into output buffer */
- size_t const cSize = ZSTD_compressEnd(zcs,
+ size_t const cSize = ZSTD_compressEnd_public(zcs,
op, oend-op, ip, iend-ip);
DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
@@ -5861,9 +5966,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
if (inputBuffered) {
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
cSize = lastBlock ?
- ZSTD_compressEnd(zcs, cDst, oSize,
+ ZSTD_compressEnd_public(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize) :
- ZSTD_compressContinue(zcs, cDst, oSize,
+ ZSTD_compressContinue_public(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize);
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
zcs->frameEnded = lastBlock;
@@ -5879,8 +5984,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
} else { /* !inputBuffered, hence ZSTD_bm_stable */
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
cSize = lastBlock ?
- ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
- ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+ ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
+ ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
/* Consume the input prior to error checking to mirror buffered mode. */
if (ip) ip += iSize;
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
diff --git a/contrib/libs/zstd/lib/compress/zstd_compress_internal.h b/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
index cbb85e527e..10f68d010e 100644
--- a/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
+++ b/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
@@ -226,8 +226,10 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
- U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
+ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable;
U32* hashTable3;
@@ -247,6 +249,13 @@ struct ZSTD_matchState_t {
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
+
+ /* When == 0, lazy match finders insert every position.
+ * When != 0, lazy match finders only insert positions they search.
+ * This allows them to skip much faster over incompressible data,
+ * at a small cost to compression ratio.
+ */
+ int lazySkipping;
};
typedef struct {
@@ -787,28 +796,35 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes
***************************************/
static const U32 prime3bytes = 506832829U;
-static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
+MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
static const U32 prime4bytes = 2654435761U;
-static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
-static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
+static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
+static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
+
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
@@ -828,6 +844,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
+ /* Although some of these hashes do support hBits up to 64, some do not.
+ * To be on the safe side, always avoid hBits > 32. */
+ assert(hBits <= 32);
+
+ switch(mls)
+ {
+ default:
+ case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
+ case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
+ case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
+ case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
+ case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
+ }
+}
+
+
/** ZSTD_ipow() :
* Return base^exponent.
*/
@@ -1475,4 +1509,24 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
+
+/* ===============================================================
+ * Deprecated definitions that are still used internally to avoid
+ * deprecation warnings. These functions are exactly equivalent to
+ * their public variants, but avoid the deprecation warnings.
+ * =============================================================== */
+
+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
#endif /* ZSTD_COMPRESS_H */
diff --git a/contrib/libs/zstd/lib/compress/zstd_cwksp.h b/contrib/libs/zstd/lib/compress/zstd_cwksp.h
index 97676693b5..cc7fb1c715 100644
--- a/contrib/libs/zstd/lib/compress/zstd_cwksp.h
+++ b/contrib/libs/zstd/lib/compress/zstd_cwksp.h
@@ -14,7 +14,9 @@
/*-*************************************
* Dependencies
***************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h"
+#include "../common/portability_macros.h"
#if defined (__cplusplus)
extern "C" {
@@ -44,8 +46,9 @@ extern "C" {
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
- ZSTD_cwksp_alloc_buffers,
- ZSTD_cwksp_alloc_aligned
+ ZSTD_cwksp_alloc_aligned_init_once,
+ ZSTD_cwksp_alloc_aligned,
+ ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e;
/**
@@ -98,8 +101,8 @@ typedef enum {
*
* Workspace Layout:
*
- * [ ... workspace ... ]
- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ * [ ... workspace ... ]
+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
@@ -123,9 +126,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
- * - Aligned: these buffers are used for various purposes that require 4 byte
- * alignment, but don't require any initialization before they're used. These
- * buffers are each aligned to 64 bytes.
+ * - Init once: these buffers require to be initialized at least once before
+ * use. They should be used when we want to skip memory initialization
+ * while not triggering memory checkers (like Valgrind) when reading from
+ * from this memory without writing to it first.
+ * These buffers should be used carefully as they might contain data
+ * from previous compressions.
+ * Buffers are aligned to 64 bytes.
+ *
+ * - Aligned: these buffers don't require any initialization before they're
+ * used. The user of the buffer should make sure they write into a buffer
+ * location before reading from it.
+ * Buffers are aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@@ -137,8 +149,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
- * 2. Buffers
- * 3. Aligned/Tables
+ * 2. Init once / Tables
+ * 3. Aligned / Tables
+ * 4. Buffers / Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@@ -150,6 +163,7 @@ typedef struct {
void* tableEnd;
void* tableValidEnd;
void* allocStart;
+ void* initOnceStart;
BYTE allocFailed;
int workspaceOversizedDuration;
@@ -162,6 +176,7 @@ typedef struct {
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
+ assert(ws->workspace <= ws->initOnceStart);
+#if ZSTD_MEMORY_SANITIZER
+ {
+ intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
+ (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
+#if defined(ZSTD_MSAN_PRINT)
+ if(offset!=-1) {
+ __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
+ }
+#endif
+ assert(offset==-1);
+ };
+#endif
}
/**
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
- * to align the beginning of the aligned section.
- *
- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
- * aligneds being sized in multiples of 64 bytes.
+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
+ * bytes to align the beginning of tables section and end of buffers;
*/
- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace;
}
@@ -237,11 +262,19 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0);
- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+ assert(bytes < alignBytes);
return bytes;
}
/**
+ * Returns the initial value for allocStart which is used to determine the position from
+ * which we can allocate from the end of the workspace.
+ */
+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
+ return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
+}
+
+/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
* which counts from the end of the wksp (as opposed to the object/table segment).
@@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{
assert(phase >= ws->phase);
if (phase > ws->phase) {
- /* Going from allocating objects to allocating buffers */
- if (ws->phase < ZSTD_cwksp_alloc_buffers &&
- phase >= ZSTD_cwksp_alloc_buffers) {
+ /* Going from allocating objects to allocating initOnce / tables */
+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
+ phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd;
- }
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
- /* Going from allocating buffers to allocating aligneds/tables */
- if (ws->phase < ZSTD_cwksp_alloc_aligned &&
- phase >= ZSTD_cwksp_alloc_aligned) {
- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
- size_t const bytesToAlign =
- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
- memory_allocation, "aligned phase - alignment initial allocation failed!");
- }
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
- void* const alloc = ws->objectEnd;
+ void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
- void* const objectEnd = (BYTE*)alloc + bytesToAlign;
+ void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
@@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
- } } }
+ }
+ }
+ }
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
@@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{
- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
}
/**
@@ -368,6 +392,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
+ * This memory has been initialized at least once in the past.
+ * This doesn't mean it has been initialized this time, and it might contain data from previous
+ * operations.
+ * The main usage is for algorithms that might need read access into uninitialized memory.
+ * The algorithm must maintain safety under these conditions and must make sure it doesn't
+ * leak any of the past data (directly or in side channels).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
+{
+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+ if(ptr && ptr < ws->initOnceStart) {
+ /* We assume the memory following the current allocation is either:
+ * 1. Not usable as initOnce memory (end of workspace)
+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
+ * 3. An ASAN redzone, in which case we don't want to write on it
+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */
+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
+ ws->initOnceStart = ptr;
+ }
+#if ZSTD_MEMORY_SANITIZER
+ assert(__msan_test_shadow(ptr, bytes) == -1);
+#endif
+ return ptr;
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
{
@@ -384,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{
- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc;
void* end;
void* top;
- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
- return NULL;
+ /* We can only start allocating tables after we are done reserving space for objects at the
+ * start of the workspace */
+ if(ws->phase < phase) {
+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+ return NULL;
+ }
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
@@ -469,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
- * space every time we mark it dirty. */
+ * space every time we mark it dirty.
+ * Since tableValidEnd space and initOnce space may overlap we don't poison
+ * the initOnce portion as it break its promise. This means that this poisoning
+ * check isn't always applied fully. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
- __msan_poison(ws->objectEnd, size);
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
+ __msan_poison(ws->objectEnd, size);
+ } else {
+ assert(ws->initOnceStart >= ws->objectEnd);
+ __msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
+ }
}
#endif
@@ -538,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
- * the workspace (or at least the non-static, non-table parts of it)
- * every time we start a new compression. */
+ * the workspace except for the areas in which we expect memory re-use
+ * without initialization (objects, valid tables area and init once
+ * memory). */
{
- size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
- __msan_poison(ws->tableValidEnd, size);
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
+ size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
+ __msan_poison(ws->tableValidEnd, size);
+ }
}
#endif
@@ -558,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#endif
ws->tableEnd = ws->objectEnd;
- ws->allocStart = ws->workspaceEnd;
+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0;
- if (ws->phase > ZSTD_cwksp_alloc_buffers) {
- ws->phase = ZSTD_cwksp_alloc_buffers;
+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
@@ -578,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
@@ -630,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
-MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
- size_t const estimatedSpace, int resizedWorkspace) {
- if (resizedWorkspace) {
- /* Resized/newly allocated wksp should have exact bounds */
- return ZSTD_cwksp_used(ws) == estimatedSpace;
- } else {
- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
- * than estimatedSpace. See the comments in zstd_cwksp.h for details.
- */
- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
- }
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
+ * the alignment bytes difference between estimation and actual usage */
+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
+ ZSTD_cwksp_used(ws) <= estimatedSpace;
}
diff --git a/contrib/libs/zstd/lib/compress/zstd_lazy.c b/contrib/libs/zstd/lib/compress/zstd_lazy.c
index a247342729..5ba88e8678 100644
--- a/contrib/libs/zstd/lib/compress/zstd_lazy.c
+++ b/contrib/libs/zstd/lib/compress/zstd_lazy.c
@@ -12,6 +12,8 @@
#include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
+#define kLazySkippingStep 8
+
/*-*************************************
* Binary Tree search
@@ -618,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams,
- const BYTE* ip, U32 const mls)
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
{
U32* const hashTable = ms->hashTable;
const U32 hashLog = cParams->hashLog;
@@ -633,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
+ /* Stop inserting every position when in the lazy skipping mode. */
+ if (lazySkipping)
+ break;
}
ms->nextToUpdate = target;
@@ -641,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
const ZSTD_compressionParameters* const cParams = &ms->cParams;
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
}
/* inlining is important to hardwire a hot branch (template emulation) */
@@ -685,7 +690,7 @@ size_t ZSTD_HcFindBestMatch(
}
/* HC4 match finder */
- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
@@ -758,7 +763,6 @@ size_t ZSTD_HcFindBestMatch(
* (SIMD) Row-based matchfinder
***********************************/
/* Constants for row-based hash */
-#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
@@ -774,39 +778,15 @@ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
return ZSTD_countTrailingZeros64(val);
}
-/* ZSTD_rotateRight_*():
- * Rotates a bitfield to the right by "count" bits.
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
- */
-FORCE_INLINE_TEMPLATE
-U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
- assert(count < 64);
- count &= 0x3F; /* for fickle pattern recognition */
- return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
-}
-
-FORCE_INLINE_TEMPLATE
-U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
- assert(count < 32);
- count &= 0x1F; /* for fickle pattern recognition */
- return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
-}
-
-FORCE_INLINE_TEMPLATE
-U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
- assert(count < 16);
- count &= 0x0F; /* for fickle pattern recognition */
- return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
-}
-
/* ZSTD_row_nextIndex():
* Returns the next index to insert at within a tagTable row, and updates the "head"
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
- U32 const next = (*tagRow - 1) & rowMask;
- *tagRow = (BYTE)next;
- return next;
+ U32 next = (*tagRow-1) & rowMask;
+ next += (next == 0) ? rowMask : 0; /* skip first position */
+ *tagRow = (BYTE)next;
+ return next;
}
/* ZSTD_isAligned():
@@ -820,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
/* ZSTD_row_prefetch():
* Performs prefetching for the hashTable and tagTable at a given row.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
PREFETCH_L1(hashTable + relRow);
if (rowLog >= 5) {
PREFETCH_L1(hashTable + relRow + 16);
@@ -844,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
U32 idx, const BYTE* const iLimit)
{
U32 const* const hashTable = ms->hashTable;
- U16 const* const tagTable = ms->tagTable;
+ BYTE const* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
for (; idx < lim; ++idx) {
- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
@@ -866,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
- U16 const* tagTable, BYTE const* base,
+ BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
- U32 const rowLog, U32 const mls)
+ U32 const rowLog, U32 const mls,
+ U64 const hashSalt)
{
- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
@@ -888,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
- U16* const tagTable = ms->tagTable;
+ BYTE* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
const BYTE* const base = ms->window.base;
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32* const row = hashTable + relRow;
- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
- Explicit cast allows us to get exact desired position within each row */
+ BYTE* tagRow = tagTable + relRow;
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
row[pos] = updateStartIdx;
}
}
@@ -1059,7 +1039,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
FORCE_INLINE_TEMPLATE ZSTD_VecMask
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
{
- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
+ const BYTE* const src = tagRow;
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
@@ -1144,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
- U16* const tagTable = ms->tagTable;
+ BYTE* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache;
const U32 hashLog = ms->rowHashLog;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -1163,8 +1143,10 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowMask = rowEntries - 1;
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
+ const U64 hashSalt = ms->hashSalt;
U32 nbAttempts = 1U << cappedSearchLog;
size_t ml=4-1;
+ U32 hash;
/* DMS/DDS variables that may be referenced laster */
const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -1188,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
if (dictMode == ZSTD_dictMatchState) {
/* Prefetch DMS rows */
U32* const dmsHashTable = dms->hashTable;
- U16* const dmsTagTable = dms->tagTable;
+ BYTE* const dmsTagTable = dms->tagTable;
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@@ -1198,9 +1180,19 @@ size_t ZSTD_RowFindBestMatch(
}
/* Update the hashTable and tagTable up to (but not including) ip */
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+ if (!ms->lazySkipping) {
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
+ } else {
+ /* Stop inserting every position when in the lazy skipping mode.
+ * The hash cache is also not kept up to date in this mode.
+ */
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
+ ms->nextToUpdate = curr;
+ }
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
+
{ /* Get the hash for ip, compute the appropriate row */
- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
U32* const row = hashTable + relRow;
@@ -1212,9 +1204,10 @@ size_t ZSTD_RowFindBestMatch(
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
/* Cycle through the matches and prefetch */
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
U32 const matchIndex = row[matchPos];
+ if(matchPos == 0) continue;
assert(numMatches < rowEntries);
if (matchIndex < lowLimit)
break;
@@ -1224,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
PREFETCH_L1(dictBase + matchIndex);
}
matchBuffer[numMatches++] = matchIndex;
+ --nbAttempts;
}
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+ tagRow[pos] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}
@@ -1281,13 +1275,15 @@ size_t ZSTD_RowFindBestMatch(
size_t currMatch = 0;
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
U32 const matchIndex = dmsRow[matchPos];
+ if(matchPos == 0) continue;
if (matchIndex < dmsLowestIndex)
break;
PREFETCH_L1(dmsBase + matchIndex);
matchBuffer[numMatches++] = matchIndex;
+ --nbAttempts;
}
/* Return the longest match */
@@ -1544,10 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
assert(offset_2 <= dictAndPrefixLength);
}
+ /* Reset the lazy skipping state */
+ ms->lazySkipping = 0;
+
if (searchMethod == search_rowHash) {
- ZSTD_row_fillHashCache(ms, base, rowLog,
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
- ms->nextToUpdate, ilimit);
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
}
/* Match Loop */
@@ -1591,7 +1588,16 @@ ZSTD_compressBlock_lazy_generic(
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
+ ip += step;
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
+ * In this mode we stop inserting every position into our tables, and only insert
+ * positions that we search, which is one in step positions.
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
+ * triggered once we've gone 2KB without finding any matches.
+ */
+ ms->lazySkipping = step > kLazySkippingStep;
continue;
}
@@ -1695,6 +1701,13 @@ _storeSequence:
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
anchor = ip = start + matchLength;
}
+ if (ms->lazySkipping) {
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+ }
+ ms->lazySkipping = 0;
+ }
/* check immediate repcode */
if (isDxS) {
@@ -1912,12 +1925,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
+ /* Reset the lazy skipping state */
+ ms->lazySkipping = 0;
+
/* init */
ip += (ip == prefixStart);
if (searchMethod == search_rowHash) {
- ZSTD_row_fillHashCache(ms, base, rowLog,
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
- ms->nextToUpdate, ilimit);
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
}
/* Match Loop */
@@ -1955,7 +1969,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
+ ip += step + 1; /* jump faster over incompressible sections */
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
+ * In this mode we stop inserting every position into our tables, and only insert
+ * positions that we search, which is one in step positions.
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
+ * triggered once we've gone 2KB without finding any matches.
+ */
+ ms->lazySkipping = step > kLazySkippingStep;
continue;
}
@@ -2041,6 +2064,13 @@ _storeSequence:
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
anchor = ip = start + matchLength;
}
+ if (ms->lazySkipping) {
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+ }
+ ms->lazySkipping = 0;
+ }
/* check immediate repcode */
while (ip <= ilimit) {
diff --git a/contrib/libs/zstd/lib/compress/zstd_opt.c b/contrib/libs/zstd/lib/compress/zstd_opt.c
index fdd7f9d8b5..f02a760946 100644
--- a/contrib/libs/zstd/lib/compress/zstd_opt.c
+++ b/contrib/libs/zstd/lib/compress/zstd_opt.c
@@ -1086,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm;
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
diff --git a/contrib/libs/zstd/lib/compress/zstdmt_compress.c b/contrib/libs/zstd/lib/compress/zstdmt_compress.c
index 7a2c71720a..6786075569 100644
--- a/contrib/libs/zstd/lib/compress/zstdmt_compress.c
+++ b/contrib/libs/zstd/lib/compress/zstdmt_compress.c
@@ -20,6 +20,7 @@
/* ====== Dependencies ====== */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
@@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
@@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
assert(job->cSize == 0);
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
ip += chunkSize;
op += cSize; assert(op < oend);
@@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
size_t const cSize = (job->lastJob) ?
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }