aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-04-20 12:08:07 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-04-20 12:08:07 +0300
commiteb16979262f120e52cda528d30de5a0cfb4ed0c9 (patch)
tree4642aac240bc0889ba11a987e6748d4fabbc8230
parentd0c642847472b3222a1a73a941917d393cc6ddf3 (diff)
downloadydb-eb16979262f120e52cda528d30de5a0cfb4ed0c9.tar.gz
Update contrib/libs/zstd to 1.5.5
-rw-r--r--contrib/libs/zstd/CHANGELOG19
-rw-r--r--contrib/libs/zstd/README.md15
-rw-r--r--contrib/libs/zstd/lib/common/allocations.h55
-rw-r--r--contrib/libs/zstd/lib/common/bits.h35
-rw-r--r--contrib/libs/zstd/lib/common/bitstream.h2
-rw-r--r--contrib/libs/zstd/lib/common/compiler.h4
-rw-r--r--contrib/libs/zstd/lib/common/pool.c2
-rw-r--r--contrib/libs/zstd/lib/common/threading.c2
-rw-r--r--contrib/libs/zstd/lib/common/zstd_common.c35
-rw-r--r--contrib/libs/zstd/lib/common/zstd_internal.h5
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_compress.c313
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_compress_internal.h80
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_cwksp.h182
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_lazy.c154
-rw-r--r--contrib/libs/zstd/lib/compress/zstd_opt.c2
-rw-r--r--contrib/libs/zstd/lib/compress/zstdmt_compress.c9
-rw-r--r--contrib/libs/zstd/lib/decompress/huf_decompress.c8
-rw-r--r--contrib/libs/zstd/lib/decompress/zstd_ddict.c1
-rw-r--r--contrib/libs/zstd/lib/decompress/zstd_decompress.c79
-rw-r--r--contrib/libs/zstd/lib/decompress/zstd_decompress_block.c33
-rw-r--r--contrib/libs/zstd/lib/decompress/zstd_decompress_block.h5
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/zdict.c4
-rw-r--r--contrib/libs/zstd/lib/zstd.h470
-rw-r--r--contrib/libs/zstd/programs/benchzstd.c49
-rw-r--r--contrib/libs/zstd/programs/benchzstd.h51
-rw-r--r--contrib/libs/zstd/programs/fileio.c297
-rw-r--r--contrib/libs/zstd/programs/fileio.h1
-rw-r--r--contrib/libs/zstd/programs/fileio_types.h12
-rw-r--r--contrib/libs/zstd/programs/timefn.c2
-rw-r--r--contrib/libs/zstd/programs/util.c92
-rw-r--r--contrib/libs/zstd/programs/util.h13
-rw-r--r--contrib/libs/zstd/programs/zstdcli.c18
32 files changed, 1353 insertions, 696 deletions
diff --git a/contrib/libs/zstd/CHANGELOG b/contrib/libs/zstd/CHANGELOG
index 4010c1ff5d..c7a7506ee3 100644
--- a/contrib/libs/zstd/CHANGELOG
+++ b/contrib/libs/zstd/CHANGELOG
@@ -1,3 +1,22 @@
+v1.5.5 (Apr 2023)
+fix: fix rare corruption bug affecting the high compression mode, reported by @danlark1 (#3517, @terrelln)
+perf: improve mid-level compression speed (#3529, #3533, #3543, @yoniko and #3552, @terrelln)
+lib: deprecated bufferless block-level API (#3534) by @terrelln
+cli: mmap large dictionaries to save memory, by @daniellerozenblit
+cli: improve speed of --patch-from mode (~+50%) (#3545) by @daniellerozenblit
+cli: improve i/o speed (~+10%) when processing lots of small files (#3479) by @felixhandte
+cli: zstd no longer crashes when requested to write into write-protected directory (#3541) by @felixhandte
+cli: fix decompression into block device using -o, reported by @georgmu (#3583)
+build: fix zstd CLI compiled with lzma support but not zlib support (#3494) by @Hello71
+build: fix cmake does no longer require 3.18 as minimum version (#3510) by @kou
+build: fix MSVC+ClangCL linking issue (#3569) by @tru
+build: fix zstd-dll, version of zstd CLI that links to the dynamic library (#3496) by @yoniko
+build: fix MSVC warnings (#3495) by @embg
+doc: updated zstd specification to clarify corner cases, by @Cyan4973
+doc: document how to create fat binaries for macos (#3568) by @rickmark
+misc: improve seekable format ingestion speed (~+100%) for very small chunk sizes (#3544) by @Cyan4973
+misc: tests/fullbench can benchmark multiple files (#3516) by @dloidolt
+
v1.5.4 (Feb 2023)
perf: +20% faster huffman decompression for targets that can't compile x64 assembly (#3449, @terrelln)
perf: up to +10% faster streaming compression at levels 1-2 (#3114, @embg)
diff --git a/contrib/libs/zstd/README.md b/contrib/libs/zstd/README.md
index 6bcf757d86..f91e68fdb1 100644
--- a/contrib/libs/zstd/README.md
+++ b/contrib/libs/zstd/README.md
@@ -13,15 +13,12 @@ a list of known ports and bindings is provided on [Zstandard homepage](https://f
**Development branch status:**
[![Build Status][travisDevBadge]][travisLink]
-[![Build status][AppveyorDevBadge]][AppveyorLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://api.travis-ci.com/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.com/facebook/zstd
-[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/xt38wbdxjk5mrbem/branch/dev?svg=true "Windows test suite"
-[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
@@ -154,6 +151,18 @@ to create `zstd` binary, and `libzstd` dynamic and static libraries.
By default, `CMAKE_BUILD_TYPE` is set to `Release`.
+#### Support for Fat (Universal2) Output
+
+`zstd` can be built and installed with support for both Apple Silicon (M1/M2) as well as Intel by using CMake's Universal2 support.
+To perform a Fat/Universal2 build and install use the following commands:
+
+```bash
+cmake -B build-cmake-debug -S build/cmake -G Ninja -DCMAKE_OSX_ARCHITECTURES="x86_64;x86_64h;arm64"
+cd build-cmake-debug
+ninja
+sudo ninja install
+```
+
### Meson
A Meson project is provided within [`build/meson`](build/meson). Follow
diff --git a/contrib/libs/zstd/lib/common/allocations.h b/contrib/libs/zstd/lib/common/allocations.h
new file mode 100644
index 0000000000..a3153c4bac
--- /dev/null
+++ b/contrib/libs/zstd/lib/common/allocations.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides custom allocation primitives
+ */
+
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+
+#include "mem.h" /* MEM_STATIC */
+#define ZSTD_STATIC_LINKING_ONLY
+#include "../zstd.h" /* ZSTD_customMem */
+
+#ifndef ZSTD_ALLOCATIONS_H
+#define ZSTD_ALLOCATIONS_H
+
+/* custom memory allocation functions */
+
+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc)
+ return customMem.customAlloc(customMem.opaque, size);
+ return ZSTD_malloc(size);
+}
+
+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc) {
+ /* calloc implemented as malloc+memset;
+ * not as efficient as calloc, but next best guess for custom malloc */
+ void* const ptr = customMem.customAlloc(customMem.opaque, size);
+ ZSTD_memset(ptr, 0, size);
+ return ptr;
+ }
+ return ZSTD_calloc(1, size);
+}
+
+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+ if (ptr!=NULL) {
+ if (customMem.customFree)
+ customMem.customFree(customMem.opaque, ptr);
+ else
+ ZSTD_free(ptr);
+ }
+}
+
+#endif /* ZSTD_ALLOCATIONS_H */
diff --git a/contrib/libs/zstd/lib/common/bits.h b/contrib/libs/zstd/lib/common/bits.h
index 7939f3d0f3..def56c474c 100644
--- a/contrib/libs/zstd/lib/common/bits.h
+++ b/contrib/libs/zstd/lib/common/bits.h
@@ -17,7 +17,7 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
{
assert(val != 0);
{
- static const int DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9};
@@ -30,7 +30,7 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
assert(val != 0);
# if defined(_MSC_VER)
# if STATIC_BMI2 == 1
- return _tzcnt_u32(val);
+ return (unsigned)_tzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
@@ -69,7 +69,7 @@ MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
assert(val != 0);
# if defined(_MSC_VER)
# if STATIC_BMI2 == 1
- return _lzcnt_u32(val);
+ return (unsigned)_lzcnt_u32(val);
# else
if (val != 0) {
unsigned long r;
@@ -92,7 +92,7 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
assert(val != 0);
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2 == 1
- return _tzcnt_u64(val);
+ return (unsigned)_tzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
@@ -123,7 +123,7 @@ MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
assert(val != 0);
# if defined(_MSC_VER) && defined(_WIN64)
# if STATIC_BMI2 == 1
- return _lzcnt_u64(val);
+ return (unsigned)_lzcnt_u64(val);
# else
if (val != 0) {
unsigned long r;
@@ -172,4 +172,29 @@ MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCo
return 31 - ZSTD_countLeadingZeros32(val);
}
+/* ZSTD_rotateRight_*():
+ * Rotates a bitfield to the right by "count" bits.
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
+ */
+MEM_STATIC
+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
+ assert(count < 64);
+ count &= 0x3F; /* for fickle pattern recognition */
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
+}
+
+MEM_STATIC
+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
+ assert(count < 32);
+ count &= 0x1F; /* for fickle pattern recognition */
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
+}
+
+MEM_STATIC
+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
+ assert(count < 16);
+ count &= 0x0F; /* for fickle pattern recognition */
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
+}
+
#endif /* ZSTD_BITS_H */
diff --git a/contrib/libs/zstd/lib/common/bitstream.h b/contrib/libs/zstd/lib/common/bitstream.h
index db1b4cf136..72b0b3df22 100644
--- a/contrib/libs/zstd/lib/common/bitstream.h
+++ b/contrib/libs/zstd/lib/common/bitstream.h
@@ -396,7 +396,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
* This function is safe, it guarantees it will not read beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
return BIT_DStream_overflow;
diff --git a/contrib/libs/zstd/lib/common/compiler.h b/contrib/libs/zstd/lib/common/compiler.h
index d4f2f285d7..73f8d01998 100644
--- a/contrib/libs/zstd/lib/common/compiler.h
+++ b/contrib/libs/zstd/lib/common/compiler.h
@@ -311,6 +311,10 @@ void __msan_poison(const volatile void *a, size_t size);
/* Returns the offset of the first (at least partially) poisoned byte in the
memory range, or -1 if the whole range is good. */
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+
+/* Print shadow and origin for the memory range to stderr in a human-readable
+ format. */
+void __msan_print_shadow(const volatile void *x, size_t size);
#endif
#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
diff --git a/contrib/libs/zstd/lib/common/pool.c b/contrib/libs/zstd/lib/common/pool.c
index f3d9d08547..d5ca5a7808 100644
--- a/contrib/libs/zstd/lib/common/pool.c
+++ b/contrib/libs/zstd/lib/common/pool.c
@@ -10,9 +10,9 @@
/* ====== Dependencies ======= */
+#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
#include "zstd_deps.h" /* size_t */
#include "debug.h" /* assert */
-#include "zstd_internal.h" /* ZSTD_customCalloc, ZSTD_customFree */
#include "pool.h"
/* ====== Compiler specifics ====== */
diff --git a/contrib/libs/zstd/lib/common/threading.c b/contrib/libs/zstd/lib/common/threading.c
index f2341105a1..ca155b9b9d 100644
--- a/contrib/libs/zstd/lib/common/threading.c
+++ b/contrib/libs/zstd/lib/common/threading.c
@@ -47,7 +47,7 @@ static unsigned __stdcall worker(void *arg)
void* (*start_routine)(void*);
void* thread_arg;
- /* Inialized thread_arg and start_routine and signal main thread that we don't need it
+ /* Initialized thread_arg and start_routine and signal main thread that we don't need it
* to wait any longer.
*/
{
diff --git a/contrib/libs/zstd/lib/common/zstd_common.c b/contrib/libs/zstd/lib/common/zstd_common.c
index 3208552475..3f04c22abf 100644
--- a/contrib/libs/zstd/lib/common/zstd_common.c
+++ b/contrib/libs/zstd/lib/common/zstd_common.c
@@ -14,7 +14,6 @@
* Dependencies
***************************************/
#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h"
#include "zstd_internal.h"
@@ -47,37 +46,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
/*! ZSTD_getErrorString() :
* provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
-
-
-
-/*=**************************************************************
-* Custom allocator
-****************************************************************/
-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
-{
- if (customMem.customAlloc)
- return customMem.customAlloc(customMem.opaque, size);
- return ZSTD_malloc(size);
-}
-
-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
-{
- if (customMem.customAlloc) {
- /* calloc implemented as malloc+memset;
- * not as efficient as calloc, but next best guess for custom malloc */
- void* const ptr = customMem.customAlloc(customMem.opaque, size);
- ZSTD_memset(ptr, 0, size);
- return ptr;
- }
- return ZSTD_calloc(1, size);
-}
-
-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
-{
- if (ptr!=NULL) {
- if (customMem.customFree)
- customMem.customFree(customMem.opaque, ptr);
- else
- ZSTD_free(ptr);
- }
-}
diff --git a/contrib/libs/zstd/lib/common/zstd_internal.h b/contrib/libs/zstd/lib/common/zstd_internal.h
index 54792712fc..d1bcada74b 100644
--- a/contrib/libs/zstd/lib/common/zstd_internal.h
+++ b/contrib/libs/zstd/lib/common/zstd_internal.h
@@ -350,11 +350,6 @@ typedef struct {
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
-/* custom memory allocation functions */
-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
-
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
diff --git a/contrib/libs/zstd/lib/compress/zstd_compress.c b/contrib/libs/zstd/lib/compress/zstd_compress.c
index b55f684cd7..d6133e70b4 100644
--- a/contrib/libs/zstd/lib/compress/zstd_compress.c
+++ b/contrib/libs/zstd/lib/compress/zstd_compress.c
@@ -11,6 +11,7 @@
/*-*************************************
* Dependencies
***************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
@@ -26,7 +27,7 @@
#include "zstd_opt.h"
#include "zstd_ldm.h"
#include "zstd_compress_superblock.h"
-#include "../common/bits.h" /* ZSTD_highbit32 */
+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */
/* ***************************************************************
* Tuning parameters
@@ -1177,16 +1178,39 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
{
+ ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */);
DEBUGLOG(4, "ZSTD_CCtx_setCParams");
- assert(cctx != NULL);
- if (cctx->streamStage != zcss_init) {
- /* All parameters in @cparams are allowed to be updated during MT compression.
- * This must be signaled, so that MT compression picks up the changes */
- cctx->cParamsChanged = 1;
- }
- /* only update if parameters are valid */
+ /* only update if all parameters are valid */
FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
- cctx->requestedParams.cParams = cparams;
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), "");
+ return 0;
+}
+
+size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams)
+{
+ ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */);
+ DEBUGLOG(4, "ZSTD_CCtx_setFParams");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), "");
+ FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), "");
+ return 0;
+}
+
+size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params)
+{
+ DEBUGLOG(4, "ZSTD_CCtx_setParams");
+ /* First check cParams, because we want to update all or none. */
+ FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+ /* Next set fParams, because this could fail if the cctx isn't in init stage. */
+ FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), "");
+ /* Finally set cParams, which should succeed. */
+ FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), "");
return 0;
}
@@ -1208,9 +1232,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
ZSTD_compressionParameters* cParams);
/**
- * Initializes the local dict using the requested parameters.
- * NOTE: This does not use the pledged src size, because it may be used for more
- * than one compression.
+ * Initializes the local dictionary using requested parameters.
+ * NOTE: Initialization does not employ the pledged src size,
+ * because the dictionary may be used for multiple compressions.
*/
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
{
@@ -1223,8 +1247,8 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
return 0;
}
if (dl->cdict != NULL) {
- assert(cctx->cdict == dl->cdict);
/* Local dictionary already initialized. */
+ assert(cctx->cdict == dl->cdict);
return 0;
}
assert(dl->dictSize > 0);
@@ -1244,26 +1268,30 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
}
size_t ZSTD_CCtx_loadDictionary_advanced(
- ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+ ZSTD_CCtx* cctx,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
- RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
- "Can't load a dictionary when ctx is not in init stage.");
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
- ZSTD_clearAllDicts(cctx); /* in case one already exists */
- if (dict == NULL || dictSize == 0) /* no dictionary mode */
+ RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+ "Can't load a dictionary when cctx is not in init stage.");
+ ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */
+ if (dict == NULL || dictSize == 0) /* no dictionary */
return 0;
if (dictLoadMethod == ZSTD_dlm_byRef) {
cctx->localDict.dict = dict;
} else {
+ /* copy dictionary content inside CCtx to own its lifetime */
void* dictBuffer;
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
- "no malloc for static CCtx");
+ "static CCtx can't allocate for an internal copy of dictionary");
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
- RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+ RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation,
+ "allocation failed for dictionary content");
ZSTD_memcpy(dictBuffer, dict, dictSize);
- cctx->localDict.dictBuffer = dictBuffer;
- cctx->localDict.dict = dictBuffer;
+ cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */
+ cctx->localDict.dict = dictBuffer; /* read-only reference */
}
cctx->localDict.dictSize = dictSize;
cctx->localDict.dictContentType = dictContentType;
@@ -1333,7 +1361,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
- "Can't reset parameters only when not in init stage.");
+ "Reset parameters is only possible during init stage.");
ZSTD_clearAllDicts(cctx);
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
@@ -1592,7 +1620,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
- ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+ ? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
@@ -1883,6 +1911,19 @@ typedef enum {
ZSTD_resetTarget_CCtx
} ZSTD_resetTarget_e;
+/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */
+static U64 ZSTD_bitmix(U64 val, U64 len) {
+ val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24);
+ val *= 0x9FB21C651E98DF25ULL;
+ val ^= (val >> 35) + len ;
+ val *= 0x9FB21C651E98DF25ULL;
+ return val ^ (val >> 28);
+}
+
+/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */
+static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) {
+ ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4);
+}
static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
@@ -1910,6 +1951,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
}
ms->hashLog3 = hashLog3;
+ ms->lazySkipping = 0;
ZSTD_invalidateMatchState(ms);
@@ -1931,6 +1973,27 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_cwksp_clean_tables(ws);
}
+ if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+ /* Row match finder needs an additional table of hashes ("tags") */
+ size_t const tagTableSize = hSize;
+ /* We want to generate a new salt in case we reset a Cctx, but we always want to use
+ * 0 when we reset a Cdict */
+ if(forWho == ZSTD_resetTarget_CCtx) {
+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize);
+ ZSTD_advanceHashSalt(ms);
+ } else {
+ /* When we are not salting we want to always memset the memory */
+ ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+ ZSTD_memset(ms->tagTable, 0, tagTableSize);
+ ms->hashSalt = 0;
+ }
+ { /* Switch to 32-entry rows if searchLog is 5 (or more) */
+ U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
+ assert(cParams->hashLog >= rowLog);
+ ms->rowHashLog = cParams->hashLog - rowLog;
+ }
+ }
+
/* opt parser space */
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
@@ -1942,19 +2005,6 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
- if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
- { /* Row match finder needs an additional table of hashes ("tags") */
- size_t const tagTableSize = hSize*sizeof(U16);
- ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
- if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
- }
- { /* Switch to 32-entry rows if searchLog is 5 (or more) */
- U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
- assert(cParams->hashLog >= rowLog);
- ms->rowHashLog = cParams->hashLog - rowLog;
- }
- }
-
ms->cParams = *cParams;
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
@@ -2101,13 +2151,46 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+ FORWARD_IF_ERROR(ZSTD_reset_matchState(
+ &zc->blockState.matchState,
+ ws,
+ &params->cParams,
+ params->useRowMatchFinder,
+ crp,
+ needsIndexReset,
+ ZSTD_resetTarget_CCtx), "");
+
+ zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+ /* ldm hash table */
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
+ /* TODO: avoid memset? */
+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
+ zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+ ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+ zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ ZSTD_window_init(&zc->ldmState.window);
+ zc->ldmState.loadedDictEnd = 0;
+ }
+
+ /* reserve space for block-level external sequences */
+ if (params->useSequenceProducer) {
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
+ zc->externalMatchCtx.seqBuffer =
+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
+ }
+
+ /* buffers */
+
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;
- /* buffers */
zc->bufferedPolicy = zbuff;
zc->inBuffSize = buffInSize;
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
@@ -2130,40 +2213,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
- zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
-
- FORWARD_IF_ERROR(ZSTD_reset_matchState(
- &zc->blockState.matchState,
- ws,
- &params->cParams,
- params->useRowMatchFinder,
- crp,
- needsIndexReset,
- ZSTD_resetTarget_CCtx), "");
-
- /* ldm hash table */
- if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
- /* TODO: avoid memset? */
- size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
- zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
- ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
- zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
- zc->maxNbLdmSequences = maxNbLdmSeq;
-
- ZSTD_window_init(&zc->ldmState.window);
- zc->ldmState.loadedDictEnd = 0;
- }
-
- /* reserve space for block-level external sequences */
- if (params->useSequenceProducer) {
- size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
- zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
- zc->externalMatchCtx.seqBuffer =
- (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
- }
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
- assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace));
zc->initialized = 1;
@@ -2338,10 +2390,11 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
}
/* copy tag table */
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
- size_t const tagTableSize = hSize*sizeof(U16);
+ size_t const tagTableSize = hSize;
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
- cdict->matchState.tagTable,
- tagTableSize);
+ cdict->matchState.tagTable,
+ tagTableSize);
+ cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt;
}
}
@@ -3858,9 +3911,10 @@ ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRe
const seqStore_t* const seqStore, U32 const nbSeq)
{
U32 idx = 0;
+ U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq;
for (; idx < nbSeq; ++idx) {
seqDef* const seq = seqStore->sequencesStart + idx;
- U32 const ll0 = (seq->litLength == 0);
+ U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx);
U32 const offBase = seq->offBase;
assert(offBase > 0);
if (OFFBASE_IS_REPCODE(offBase)) {
@@ -4576,31 +4630,51 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
}
}
-size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
}
+/* NOTE: Must just wrap ZSTD_compressContinue_public() */
+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize);
+}
-size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx)
{
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
assert(!ZSTD_checkCParams(cParams));
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
}
-size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+ return ZSTD_getBlockSize_deprecated(cctx);
+}
+
+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
- { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+ { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx);
RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
}
+/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize);
+}
+
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
@@ -4644,31 +4718,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
ip = iend - maxDictSize;
src = ip;
srcSize = maxDictSize;
- } }
+ }
+ }
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
/* We must have cleared our windows when our source is this large. */
assert(ZSTD_window_isEmpty(ms->window));
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
}
+ ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
- ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
- ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
- ms->forceNonContiguous = params->deterministicRefPrefix;
- if (loadLdmDict) {
+ if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */
ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+ ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
+ }
+
+ /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */
+ if (params->cParams.strategy < ZSTD_btultra) {
+ U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28);
+ if (srcSize > maxDictSize) {
+ ip = iend - maxDictSize;
+ src = ip;
+ srcSize = maxDictSize;
+ }
}
+ ms->nextToUpdate = (U32)(ip - ms->window.base);
+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+ ms->forceNonContiguous = params->deterministicRefPrefix;
+
if (srcSize <= HASH_READ_SIZE) return 0;
ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
- if (loadLdmDict)
- ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
-
switch(params->cParams.strategy)
{
case ZSTD_fast:
@@ -4688,7 +4773,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
} else {
assert(params->useRowMatchFinder != ZSTD_ps_auto);
if (params->useRowMatchFinder == ZSTD_ps_enable) {
- size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
ZSTD_memset(ms->tagTable, 0, tagTableSize);
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
DEBUGLOG(4, "Using row-based hash table for lazy dict");
@@ -4991,8 +5076,8 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
&cctxParams, pledgedSrcSize);
}
-size_t
-ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+static size_t
+ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_CCtx_params cctxParams;
{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
@@ -5003,9 +5088,15 @@ ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
&cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
+size_t
+ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+ return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel);
+}
+
size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
{
- return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+ return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel);
}
@@ -5075,9 +5166,9 @@ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
#endif
}
-size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
size_t endResult;
size_t const cSize = ZSTD_compressContinue_internal(cctx,
@@ -5101,6 +5192,14 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
return cSize + endResult;
}
+/* NOTE: Must just wrap ZSTD_compressEnd_public() */
+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
+}
+
size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -5129,7 +5228,7 @@ size_t ZSTD_compress_advanced_internal(
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
params, srcSize, ZSTDb_not_buffered) , "");
- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
@@ -5451,6 +5550,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
params.cParams = cParams;
params.useRowMatchFinder = useRowMatchFinder;
cdict->useRowMatchFinder = useRowMatchFinder;
+ cdict->compressionLevel = ZSTD_NO_CLEVEL;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
@@ -5530,12 +5630,17 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
/* ZSTD_compressBegin_usingCDict() :
* cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
{
ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
}
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+ return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict);
+}
+
/*! ZSTD_compress_usingCDict_internal():
* Implementation of various ZSTD_compress_usingCDict* functions.
*/
@@ -5545,7 +5650,7 @@ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
{
FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
- return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize);
}
/*! ZSTD_compress_usingCDict_advanced():
@@ -5803,7 +5908,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|| zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */
&& (zcs->inBuffPos == 0) ) {
/* shortcut to compression pass directly into output buffer */
- size_t const cSize = ZSTD_compressEnd(zcs,
+ size_t const cSize = ZSTD_compressEnd_public(zcs,
op, oend-op, ip, iend-ip);
DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
@@ -5861,9 +5966,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
if (inputBuffered) {
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
cSize = lastBlock ?
- ZSTD_compressEnd(zcs, cDst, oSize,
+ ZSTD_compressEnd_public(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize) :
- ZSTD_compressContinue(zcs, cDst, oSize,
+ ZSTD_compressContinue_public(zcs, cDst, oSize,
zcs->inBuff + zcs->inToCompress, iSize);
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
zcs->frameEnded = lastBlock;
@@ -5879,8 +5984,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
} else { /* !inputBuffered, hence ZSTD_bm_stable */
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
cSize = lastBlock ?
- ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
- ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+ ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) :
+ ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize);
/* Consume the input prior to error checking to mirror buffered mode. */
if (ip) ip += iSize;
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
diff --git a/contrib/libs/zstd/lib/compress/zstd_compress_internal.h b/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
index cbb85e527e..10f68d010e 100644
--- a/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
+++ b/contrib/libs/zstd/lib/compress/zstd_compress_internal.h
@@ -226,8 +226,10 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
- U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
+ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable;
U32* hashTable3;
@@ -247,6 +249,13 @@ struct ZSTD_matchState_t {
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
+
+ /* When == 0, lazy match finders insert every position.
+ * When != 0, lazy match finders only insert positions they search.
+ * This allows them to skip much faster over incompressible data,
+ * at a small cost to compression ratio.
+ */
+ int lazySkipping;
};
typedef struct {
@@ -787,28 +796,35 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes
***************************************/
static const U32 prime3bytes = 506832829U;
-static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
+MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
static const U32 prime4bytes = 2654435761U;
-static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
-static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
+static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
+static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
+static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
+
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
@@ -828,6 +844,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
+ /* Although some of these hashes do support hBits up to 64, some do not.
+ * To be on the safe side, always avoid hBits > 32. */
+ assert(hBits <= 32);
+
+ switch(mls)
+ {
+ default:
+ case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
+ case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
+ case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
+ case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
+ case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
+ }
+}
+
+
/** ZSTD_ipow() :
* Return base^exponent.
*/
@@ -1475,4 +1509,24 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
+
+/* ===============================================================
+ * Deprecated definitions that are still used internally to avoid
+ * deprecation warnings. These functions are exactly equivalent to
+ * their public variants, but avoid the deprecation warnings.
+ * =============================================================== */
+
+size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
#endif /* ZSTD_COMPRESS_H */
diff --git a/contrib/libs/zstd/lib/compress/zstd_cwksp.h b/contrib/libs/zstd/lib/compress/zstd_cwksp.h
index 97676693b5..cc7fb1c715 100644
--- a/contrib/libs/zstd/lib/compress/zstd_cwksp.h
+++ b/contrib/libs/zstd/lib/compress/zstd_cwksp.h
@@ -14,7 +14,9 @@
/*-*************************************
* Dependencies
***************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h"
+#include "../common/portability_macros.h"
#if defined (__cplusplus)
extern "C" {
@@ -44,8 +46,9 @@ extern "C" {
***************************************/
typedef enum {
ZSTD_cwksp_alloc_objects,
- ZSTD_cwksp_alloc_buffers,
- ZSTD_cwksp_alloc_aligned
+ ZSTD_cwksp_alloc_aligned_init_once,
+ ZSTD_cwksp_alloc_aligned,
+ ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e;
/**
@@ -98,8 +101,8 @@ typedef enum {
*
* Workspace Layout:
*
- * [ ... workspace ... ]
- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ * [ ... workspace ... ]
+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
*
* The various objects that live in the workspace are divided into the
* following categories, and are allocated separately:
@@ -123,9 +126,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned.
*
- * - Aligned: these buffers are used for various purposes that require 4 byte
- * alignment, but don't require any initialization before they're used. These
- * buffers are each aligned to 64 bytes.
+ * - Init once: these buffers require to be initialized at least once before
+ * use. They should be used when we want to skip memory initialization
+ * while not triggering memory checkers (like Valgrind) when reading from
+ * from this memory without writing to it first.
+ * These buffers should be used carefully as they might contain data
+ * from previous compressions.
+ * Buffers are aligned to 64 bytes.
+ *
+ * - Aligned: these buffers don't require any initialization before they're
+ * used. The user of the buffer should make sure they write into a buffer
+ * location before reading from it.
+ * Buffers are aligned to 64 bytes.
*
* - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can
@@ -137,8 +149,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is:
*
* 1. Objects
- * 2. Buffers
- * 3. Aligned/Tables
+ * 2. Init once / Tables
+ * 3. Aligned / Tables
+ * 4. Buffers / Tables
*
* Attempts to reserve objects of different types out of order will fail.
*/
@@ -150,6 +163,7 @@ typedef struct {
void* tableEnd;
void* tableValidEnd;
void* allocStart;
+ void* initOnceStart;
BYTE allocFailed;
int workspaceOversizedDuration;
@@ -162,6 +176,7 @@ typedef struct {
***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws;
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd);
+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
+ assert(ws->workspace <= ws->initOnceStart);
+#if ZSTD_MEMORY_SANITIZER
+ {
+ intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
+ (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
+#if defined(ZSTD_MSAN_PRINT)
+ if(offset!=-1) {
+ __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
+ }
+#endif
+ assert(offset==-1);
+ };
+#endif
}
/**
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment).
*/
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
- * to align the beginning of the aligned section.
- *
- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
- * aligneds being sized in multiples of 64 bytes.
+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
+ * bytes to align the beginning of tables section and end of buffers;
*/
- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace;
}
@@ -237,11 +262,19 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0);
- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+ assert(bytes < alignBytes);
return bytes;
}
/**
+ * Returns the initial value for allocStart which is used to determine the position from
+ * which we can allocate from the end of the workspace.
+ */
+MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
+ return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
+}
+
+/**
* Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
* which counts from the end of the wksp (as opposed to the object/table segment).
@@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{
assert(phase >= ws->phase);
if (phase > ws->phase) {
- /* Going from allocating objects to allocating buffers */
- if (ws->phase < ZSTD_cwksp_alloc_buffers &&
- phase >= ZSTD_cwksp_alloc_buffers) {
+ /* Going from allocating objects to allocating initOnce / tables */
+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
+ phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd;
- }
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
- /* Going from allocating buffers to allocating aligneds/tables */
- if (ws->phase < ZSTD_cwksp_alloc_aligned &&
- phase >= ZSTD_cwksp_alloc_aligned) {
- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
- size_t const bytesToAlign =
- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
- memory_allocation, "aligned phase - alignment initial allocation failed!");
- }
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
- void* const alloc = ws->objectEnd;
+ void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
- void* const objectEnd = (BYTE*)alloc + bytesToAlign;
+ void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!");
@@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd;
- } } }
+ }
+ }
+ }
ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws);
}
@@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{
- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
}
/**
@@ -368,6 +392,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
/**
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
+ * This memory has been initialized at least once in the past.
+ * This doesn't mean it has been initialized this time, and it might contain data from previous
+ * operations.
+ * The main usage is for algorithms that might need read access into uninitialized memory.
+ * The algorithm must maintain safety under these conditions and must make sure it doesn't
+ * leak any of the past data (directly or in side channels).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
+{
+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+ if(ptr && ptr < ws->initOnceStart) {
+ /* We assume the memory following the current allocation is either:
+ * 1. Not usable as initOnce memory (end of workspace)
+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
+ * 3. An ASAN redzone, in which case we don't want to write on it
+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */
+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
+ ws->initOnceStart = ptr;
+ }
+#if ZSTD_MEMORY_SANITIZER
+ assert(__msan_test_shadow(ptr, bytes) == -1);
+#endif
+ return ptr;
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
{
@@ -384,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
*/
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{
- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc;
void* end;
void* top;
- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
- return NULL;
+ /* We can only start allocating tables after we are done reserving space for objects at the
+ * start of the workspace */
+ if(ws->phase < phase) {
+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+ return NULL;
+ }
}
alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes;
@@ -469,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the table re-use logic is sound, and that we don't
* access table space that we haven't cleaned, we re-"poison" the table
- * space every time we mark it dirty. */
+ * space every time we mark it dirty.
+ * Since tableValidEnd space and initOnce space may overlap we don't poison
+ * the initOnce portion as it break its promise. This means that this poisoning
+ * check isn't always applied fully. */
{
size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
assert(__msan_test_shadow(ws->objectEnd, size) == -1);
- __msan_poison(ws->objectEnd, size);
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
+ __msan_poison(ws->objectEnd, size);
+ } else {
+ assert(ws->initOnceStart >= ws->objectEnd);
+ __msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
+ }
}
#endif
@@ -538,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
/* To validate that the context re-use logic is sound, and that we don't
* access stuff that this compression hasn't initialized, we re-"poison"
- * the workspace (or at least the non-static, non-table parts of it)
- * every time we start a new compression. */
+ * the workspace except for the areas in which we expect memory re-use
+ * without initialization (objects, valid tables area and init once
+ * memory). */
{
- size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
- __msan_poison(ws->tableValidEnd, size);
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
+ size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
+ __msan_poison(ws->tableValidEnd, size);
+ }
}
#endif
@@ -558,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
#endif
ws->tableEnd = ws->objectEnd;
- ws->allocStart = ws->workspaceEnd;
+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0;
- if (ws->phase > ZSTD_cwksp_alloc_buffers) {
- ws->phase = ZSTD_cwksp_alloc_buffers;
+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
}
ZSTD_cwksp_assert_internal_consistency(ws);
}
@@ -578,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd;
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws);
@@ -630,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used.
*/
-MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
- size_t const estimatedSpace, int resizedWorkspace) {
- if (resizedWorkspace) {
- /* Resized/newly allocated wksp should have exact bounds */
- return ZSTD_cwksp_used(ws) == estimatedSpace;
- } else {
- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
- * than estimatedSpace. See the comments in zstd_cwksp.h for details.
- */
- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
- }
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
+ * the alignment bytes difference between estimation and actual usage */
+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
+ ZSTD_cwksp_used(ws) <= estimatedSpace;
}
diff --git a/contrib/libs/zstd/lib/compress/zstd_lazy.c b/contrib/libs/zstd/lib/compress/zstd_lazy.c
index a247342729..5ba88e8678 100644
--- a/contrib/libs/zstd/lib/compress/zstd_lazy.c
+++ b/contrib/libs/zstd/lib/compress/zstd_lazy.c
@@ -12,6 +12,8 @@
#include "zstd_lazy.h"
#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
+#define kLazySkippingStep 8
+
/*-*************************************
* Binary Tree search
@@ -618,7 +620,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
ZSTD_matchState_t* ms,
const ZSTD_compressionParameters* const cParams,
- const BYTE* ip, U32 const mls)
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
{
U32* const hashTable = ms->hashTable;
const U32 hashLog = cParams->hashLog;
@@ -633,6 +635,9 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
+ /* Stop inserting every position when in the lazy skipping mode. */
+ if (lazySkipping)
+ break;
}
ms->nextToUpdate = target;
@@ -641,7 +646,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
const ZSTD_compressionParameters* const cParams = &ms->cParams;
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
}
/* inlining is important to hardwire a hot branch (template emulation) */
@@ -685,7 +690,7 @@ size_t ZSTD_HcFindBestMatch(
}
/* HC4 match finder */
- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
@@ -758,7 +763,6 @@ size_t ZSTD_HcFindBestMatch(
* (SIMD) Row-based matchfinder
***********************************/
/* Constants for row-based hash */
-#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
@@ -774,39 +778,15 @@ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
return ZSTD_countTrailingZeros64(val);
}
-/* ZSTD_rotateRight_*():
- * Rotates a bitfield to the right by "count" bits.
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
- */
-FORCE_INLINE_TEMPLATE
-U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
- assert(count < 64);
- count &= 0x3F; /* for fickle pattern recognition */
- return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
-}
-
-FORCE_INLINE_TEMPLATE
-U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
- assert(count < 32);
- count &= 0x1F; /* for fickle pattern recognition */
- return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
-}
-
-FORCE_INLINE_TEMPLATE
-U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
- assert(count < 16);
- count &= 0x0F; /* for fickle pattern recognition */
- return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
-}
-
/* ZSTD_row_nextIndex():
* Returns the next index to insert at within a tagTable row, and updates the "head"
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
- U32 const next = (*tagRow - 1) & rowMask;
- *tagRow = (BYTE)next;
- return next;
+ U32 next = (*tagRow-1) & rowMask;
+ next += (next == 0) ? rowMask : 0; /* skip first position */
+ *tagRow = (BYTE)next;
+ return next;
}
/* ZSTD_isAligned():
@@ -820,7 +800,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
/* ZSTD_row_prefetch():
* Performs prefetching for the hashTable and tagTable at a given row.
*/
-FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
PREFETCH_L1(hashTable + relRow);
if (rowLog >= 5) {
PREFETCH_L1(hashTable + relRow + 16);
@@ -844,13 +824,13 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
U32 idx, const BYTE* const iLimit)
{
U32 const* const hashTable = ms->hashTable;
- U16 const* const tagTable = ms->tagTable;
+ BYTE const* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
for (; idx < lim; ++idx) {
- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
@@ -866,11 +846,12 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
- U16 const* tagTable, BYTE const* base,
+ BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
- U32 const rowLog, U32 const mls)
+ U32 const rowLog, U32 const mls,
+ U64 const hashSalt)
{
- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
{ U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
@@ -888,22 +869,21 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
- U16* const tagTable = ms->tagTable;
+ BYTE* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
const BYTE* const base = ms->window.base;
DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32* const row = hashTable + relRow;
- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
- Explicit cast allows us to get exact desired position within each row */
+ BYTE* tagRow = tagTable + relRow;
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
row[pos] = updateStartIdx;
}
}
@@ -1059,7 +1039,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
FORCE_INLINE_TEMPLATE ZSTD_VecMask
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
{
- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
+ const BYTE* const src = tagRow;
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
@@ -1144,7 +1124,7 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
- U16* const tagTable = ms->tagTable;
+ BYTE* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache;
const U32 hashLog = ms->rowHashLog;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -1163,8 +1143,10 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowMask = rowEntries - 1;
const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
+ const U64 hashSalt = ms->hashSalt;
U32 nbAttempts = 1U << cappedSearchLog;
size_t ml=4-1;
+ U32 hash;
/* DMS/DDS variables that may be referenced laster */
const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -1188,7 +1170,7 @@ size_t ZSTD_RowFindBestMatch(
if (dictMode == ZSTD_dictMatchState) {
/* Prefetch DMS rows */
U32* const dmsHashTable = dms->hashTable;
- U16* const dmsTagTable = dms->tagTable;
+ BYTE* const dmsTagTable = dms->tagTable;
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@@ -1198,9 +1180,19 @@ size_t ZSTD_RowFindBestMatch(
}
/* Update the hashTable and tagTable up to (but not including) ip */
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+ if (!ms->lazySkipping) {
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
+ } else {
+ /* Stop inserting every position when in the lazy skipping mode.
+ * The hash cache is also not kept up to date in this mode.
+ */
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
+ ms->nextToUpdate = curr;
+ }
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
+
{ /* Get the hash for ip, compute the appropriate row */
- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
U32* const row = hashTable + relRow;
@@ -1212,9 +1204,10 @@ size_t ZSTD_RowFindBestMatch(
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
/* Cycle through the matches and prefetch */
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
U32 const matchIndex = row[matchPos];
+ if(matchPos == 0) continue;
assert(numMatches < rowEntries);
if (matchIndex < lowLimit)
break;
@@ -1224,13 +1217,14 @@ size_t ZSTD_RowFindBestMatch(
PREFETCH_L1(dictBase + matchIndex);
}
matchBuffer[numMatches++] = matchIndex;
+ --nbAttempts;
}
/* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+ tagRow[pos] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}
@@ -1281,13 +1275,15 @@ size_t ZSTD_RowFindBestMatch(
size_t currMatch = 0;
ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
U32 const matchIndex = dmsRow[matchPos];
+ if(matchPos == 0) continue;
if (matchIndex < dmsLowestIndex)
break;
PREFETCH_L1(dmsBase + matchIndex);
matchBuffer[numMatches++] = matchIndex;
+ --nbAttempts;
}
/* Return the longest match */
@@ -1544,10 +1540,11 @@ ZSTD_compressBlock_lazy_generic(
assert(offset_2 <= dictAndPrefixLength);
}
+ /* Reset the lazy skipping state */
+ ms->lazySkipping = 0;
+
if (searchMethod == search_rowHash) {
- ZSTD_row_fillHashCache(ms, base, rowLog,
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
- ms->nextToUpdate, ilimit);
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
}
/* Match Loop */
@@ -1591,7 +1588,16 @@ ZSTD_compressBlock_lazy_generic(
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
+ ip += step;
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
+ * In this mode we stop inserting every position into our tables, and only insert
+ * positions that we search, which is one in step positions.
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
+ * triggered once we've gone 2KB without finding any matches.
+ */
+ ms->lazySkipping = step > kLazySkippingStep;
continue;
}
@@ -1695,6 +1701,13 @@ _storeSequence:
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
anchor = ip = start + matchLength;
}
+ if (ms->lazySkipping) {
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+ }
+ ms->lazySkipping = 0;
+ }
/* check immediate repcode */
if (isDxS) {
@@ -1912,12 +1925,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
+ /* Reset the lazy skipping state */
+ ms->lazySkipping = 0;
+
/* init */
ip += (ip == prefixStart);
if (searchMethod == search_rowHash) {
- ZSTD_row_fillHashCache(ms, base, rowLog,
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
- ms->nextToUpdate, ilimit);
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
}
/* Match Loop */
@@ -1955,7 +1969,16 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
+ ip += step + 1; /* jump faster over incompressible sections */
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
+ * In this mode we stop inserting every position into our tables, and only insert
+ * positions that we search, which is one in step positions.
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
+ * triggered once we've gone 2KB without finding any matches.
+ */
+ ms->lazySkipping = step > kLazySkippingStep;
continue;
}
@@ -2041,6 +2064,13 @@ _storeSequence:
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
anchor = ip = start + matchLength;
}
+ if (ms->lazySkipping) {
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
+ if (searchMethod == search_rowHash) {
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
+ }
+ ms->lazySkipping = 0;
+ }
/* check immediate repcode */
while (ip <= ilimit) {
diff --git a/contrib/libs/zstd/lib/compress/zstd_opt.c b/contrib/libs/zstd/lib/compress/zstd_opt.c
index fdd7f9d8b5..f02a760946 100644
--- a/contrib/libs/zstd/lib/compress/zstd_opt.c
+++ b/contrib/libs/zstd/lib/compress/zstd_opt.c
@@ -1086,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm;
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
diff --git a/contrib/libs/zstd/lib/compress/zstdmt_compress.c b/contrib/libs/zstd/lib/compress/zstdmt_compress.c
index 7a2c71720a..6786075569 100644
--- a/contrib/libs/zstd/lib/compress/zstdmt_compress.c
+++ b/contrib/libs/zstd/lib/compress/zstdmt_compress.c
@@ -20,6 +20,7 @@
/* ====== Dependencies ====== */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
#include "../common/mem.h" /* MEM_STATIC */
#include "../common/pool.h" /* threadpool */
@@ -719,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
@@ -737,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
assert(job->cSize == 0);
for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
ip += chunkSize;
op += cSize; assert(op < oend);
@@ -757,8 +758,8 @@ static void ZSTDMT_compressionJob(void* jobDescription)
size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
size_t const cSize = (job->lastJob) ?
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
lastCBlockSize = cSize;
} }
diff --git a/contrib/libs/zstd/lib/decompress/huf_decompress.c b/contrib/libs/zstd/lib/decompress/huf_decompress.c
index c2d1f633a4..5b217ac586 100644
--- a/contrib/libs/zstd/lib/decompress/huf_decompress.c
+++ b/contrib/libs/zstd/lib/decompress/huf_decompress.c
@@ -696,7 +696,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
/* Copy the arguments to local variables */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
ZSTD_memcpy(&op, &args->op, sizeof(op));
assert(MEM_isLittleEndian());
@@ -779,7 +779,7 @@ _out:
/* Save the final values of each of the state variables back to args. */
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
ZSTD_memcpy(&args->op, &op, sizeof(op));
}
@@ -1476,7 +1476,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
/* Copy the arguments to local registers. */
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
- ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
+ ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip));
ZSTD_memcpy(&op, &args->op, sizeof(op));
oend[0] = op[1];
@@ -1599,7 +1599,7 @@ _out:
/* Save the final values of each of the state variables back to args. */
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
- ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
+ ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip));
ZSTD_memcpy(&args->op, &op, sizeof(op));
}
diff --git a/contrib/libs/zstd/lib/decompress/zstd_ddict.c b/contrib/libs/zstd/lib/decompress/zstd_ddict.c
index ad5c34a7fc..309ec0d036 100644
--- a/contrib/libs/zstd/lib/decompress/zstd_ddict.c
+++ b/contrib/libs/zstd/lib/decompress/zstd_ddict.c
@@ -14,6 +14,7 @@
/*-*******************************************************
* Dependencies
*********************************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */
diff --git a/contrib/libs/zstd/lib/decompress/zstd_decompress.c b/contrib/libs/zstd/lib/decompress/zstd_decompress.c
index 093a32716d..02e254386d 100644
--- a/contrib/libs/zstd/lib/decompress/zstd_decompress.c
+++ b/contrib/libs/zstd/lib/decompress/zstd_decompress.c
@@ -55,6 +55,7 @@
/*-*******************************************************
* Dependencies
*********************************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
@@ -588,49 +589,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported, "");
- {
- size_t const skippableSize = skippableHeaderSize + sizeU32;
+ { size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
return skippableSize;
}
}
/*! ZSTD_readSkippableFrame() :
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ * Retrieves content of a skippable frame, and writes it to dst buffer.
*
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant.
*
- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
*
* @return : number of bytes written or a ZSTD error.
*/
-ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
- const void* src, size_t srcSize)
+size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
+ unsigned* magicVariant, /* optional, can be NULL */
+ const void* src, size_t srcSize)
{
- U32 const magicNumber = MEM_readLE32(src);
- size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
- size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
-
- /* check input validity */
- RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
- RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
- RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
+ RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
- /* deliver payload */
- if (skippableContentSize > 0 && dst != NULL)
- ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
- if (magicVariant != NULL)
- *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
- return skippableContentSize;
+ { U32 const magicNumber = MEM_readLE32(src);
+ size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
+ size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
+
+ /* check input validity */
+ RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
+ RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
+ RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
+
+ /* deliver payload */
+ if (skippableContentSize > 0 && dst != NULL)
+ ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
+ if (magicVariant != NULL)
+ *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
+ return skippableContentSize;
+ }
}
/** ZSTD_findDecompressedSize() :
- * compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
* skippable frames
- * @return : decompressed size of the frames contained */
+ * note: compatible with legacy mode
+ * @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
@@ -640,9 +644,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
- if (ZSTD_isError(skippableSize)) {
- return ZSTD_CONTENTSIZE_ERROR;
- }
+ if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
@@ -650,17 +652,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
continue;
}
- { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
- if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+ { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
+ if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
- /* check for overflow */
- if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
- totalDstSize += ret;
+ if (totalDstSize + fcs < totalDstSize)
+ return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
+ totalDstSize += fcs;
}
+ /* skip to next frame */
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
- if (ZSTD_isError(frameSrcSize)) {
- return ZSTD_CONTENTSIZE_ERROR;
- }
+ if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
+ assert(frameSrcSize <= srcSize);
src = (const BYTE *)src + frameSrcSize;
srcSize -= frameSrcSize;
@@ -1090,17 +1092,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
#endif
- { U32 const magicNumber = MEM_readLE32(src);
- DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
- (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+ if (srcSize >= 4) {
+ U32 const magicNumber = MEM_readLE32(src);
+ DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+ /* skippable frame detected : skip it */
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
- FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+ FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
- continue;
+ continue; /* check next frame */
} }
if (ddict) {
diff --git a/contrib/libs/zstd/lib/decompress/zstd_decompress_block.c b/contrib/libs/zstd/lib/decompress/zstd_decompress_block.c
index 0a06a021e1..09896a931e 100644
--- a/contrib/libs/zstd/lib/decompress/zstd_decompress_block.c
+++ b/contrib/libs/zstd/lib/decompress/zstd_decompress_block.c
@@ -1232,11 +1232,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
/* sequence */
{ size_t offset;
- #if defined(__clang__)
- if (LIKELY(ofBits > 1)) {
- #else
if (ofBits > 1) {
- #endif
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
@@ -1273,11 +1269,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
seq.offset = offset;
}
- #if defined(__clang__)
- if (UNLIKELY(mlBits > 0))
- #else
if (mlBits > 0)
- #endif
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
@@ -1287,11 +1279,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
- #if defined(__clang__)
- if (UNLIKELY(llBits > 0))
- #else
if (llBits > 0)
- #endif
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
if (MEM_32bits())
@@ -1987,7 +1975,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
/**
- * @returns The total size of the history referencable by zstd, including
+ * @returns The total size of the history referenceable by zstd, including
* both the prefix and the extDict. At @p op any offset larger than this
* is invalid.
*/
@@ -2124,7 +2112,9 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
ip += seqHSize;
srcSize -= seqHSize;
- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
+ "invalid dst");
/* If we could potentially have long offsets, or we might want to use the prefetch decoder,
* compute information about the share of long offsets, and the maximum nbAdditionalBits.
@@ -2181,9 +2171,9 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
}
-size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
size_t dSize;
ZSTD_checkContinuity(dctx, dst, dstCapacity);
@@ -2191,3 +2181,12 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
dctx->previousDstEnd = (char*)dst + dSize;
return dSize;
}
+
+
+/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
+{
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
+}
diff --git a/contrib/libs/zstd/lib/decompress/zstd_decompress_block.h b/contrib/libs/zstd/lib/decompress/zstd_decompress_block.h
index 67791dbc3a..9d1318882d 100644
--- a/contrib/libs/zstd/lib/decompress/zstd_decompress_block.h
+++ b/contrib/libs/zstd/lib/decompress/zstd_decompress_block.h
@@ -64,5 +64,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2);
+/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
+size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize);
+
#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/contrib/libs/zstd/lib/dictBuilder/zdict.c b/contrib/libs/zstd/lib/dictBuilder/zdict.c
index 140a0f909c..604223c772 100644
--- a/contrib/libs/zstd/lib/dictBuilder/zdict.c
+++ b/contrib/libs/zstd/lib/dictBuilder/zdict.c
@@ -566,11 +566,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
size_t cSize;
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
- { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
+ { size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
}
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+ cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
if (cSize) { /* if == 0; block is not compressible */
diff --git a/contrib/libs/zstd/lib/zstd.h b/contrib/libs/zstd/lib/zstd.h
index 95aac07370..e5c3f8b68b 100644
--- a/contrib/libs/zstd/lib/zstd.h
+++ b/contrib/libs/zstd/lib/zstd.h
@@ -106,7 +106,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 5
-#define ZSTD_VERSION_RELEASE 4
+#define ZSTD_VERSION_RELEASE 5
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
/*! ZSTD_versionNumber() :
@@ -148,7 +148,8 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
***************************************/
/*! ZSTD_compress() :
* Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
+ * enough space to successfully compress the data.
* @return : compressed size written into `dst` (<= `dstCapacity),
* or an error code if it fails (which can be tested using ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
@@ -578,7 +579,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
* Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
* - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
* - The function is always blocking, returns when compression is completed.
- * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+ * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
+ * enough space to successfully compress the data, though it is possible it fails for other reasons.
* @return : compressed size written into `dst` (<= `dstCapacity),
* or an error code if it fails (which can be tested using ZSTD_isError()).
*/
@@ -1018,9 +1020,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
* Advanced dictionary and prefix API (Requires v1.4.0+)
*
* This API allows dictionaries to be used with ZSTD_compress2(),
- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
- * only reset with the context is reset with ZSTD_reset_parameters or
- * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
+ * Dictionaries are sticky, they remain valid when same context is re-used,
+ * they only reset when the context is reset
+ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
+ * In contrast, Prefixes are single-use.
******************************************************************************/
@@ -1041,7 +1045,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
* Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
* In such a case, dictionary buffer must outlive its users.
* Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
- * to precisely select how dictionary content must be interpreted. */
+ * to precisely select how dictionary content must be interpreted.
+ * Note 5 : This method does not benefit from LDM (long distance mode).
+ * If you want to employ LDM on some large dictionary content,
+ * prefer employing ZSTD_CCtx_refPrefix() described below.
+ */
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
@@ -1064,6 +1072,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Decompression will need same prefix to properly regenerate data.
* Compressing with a prefix is similar in outcome as performing a diff and compressing it,
* but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * This method is compatible with LDM (long distance mode).
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
* Note 1 : Prefix buffer is referenced. It **must** outlive compression.
@@ -1387,7 +1396,7 @@ typedef enum {
} ZSTD_paramSwitch_e;
/***************************************
-* Frame size functions
+* Frame header and size functions
***************************************/
/*! ZSTD_findDecompressedSize() :
@@ -1434,6 +1443,30 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size
* or an error code (if srcSize is too small) */
ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+ unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+ unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
+ unsigned blockSizeMax;
+ ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+ unsigned headerSize;
+ unsigned dictID;
+ unsigned checksumFlag;
+ unsigned _reserved1;
+ unsigned _reserved2;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ * decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ * >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ * or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ * same as ZSTD_getFrameHeader(),
+ * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+
/*! ZSTD_decompressionMargin() :
* Zstd supports in-place decompression, where the input and output buffers overlap.
* In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
@@ -1803,12 +1836,26 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
/*! ZSTD_CCtx_setCParams() :
- * Set all parameters provided within @cparams into the working @cctx.
+ * Set all parameters provided within @p cparams into the working @p cctx.
* Note : if modifying parameters during compression (MT mode only),
* note that changes to the .windowLog parameter will be ignored.
- * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ * On failure, no parameters are updated.
+ */
ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
+/*! ZSTD_CCtx_setFParams() :
+ * Set all parameters provided within @p fparams into the working @p cctx.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
+
+/*! ZSTD_CCtx_setParams() :
+ * Set all parameters provided within @p params into the working @p cctx.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
+
/*! ZSTD_compress_advanced() :
* Note : this function is now DEPRECATED.
* It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
@@ -2134,7 +2181,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* This parameter can be used to set an upper bound on the blocksize
* that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
* bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
- * compressBound() innacurate). Only currently meant to be used for testing.
+ * compressBound() inaccurate). Only currently meant to be used for testing.
*
*/
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
@@ -2452,12 +2499,9 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
int compressionLevel);
/*! ZSTD_initCStream_advanced() :
- * This function is DEPRECATED, and is approximately equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * // Pseudocode: Set each zstd parameter and leave the rest as-is.
- * for ((param, value) : params) {
- * ZSTD_CCtx_setParameter(zcs, param, value);
- * }
+ * ZSTD_CCtx_setParams(zcs, params);
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
*
@@ -2486,12 +2530,9 @@ ZSTDLIB_STATIC_API
size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
/*! ZSTD_initCStream_usingCDict_advanced() :
- * This function is DEPRECATED, and is approximately equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- * // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
- * for ((fParam, value) : fParams) {
- * ZSTD_CCtx_setParameter(zcs, fParam, value);
- * }
+ * ZSTD_CCtx_setFParams(zcs, fParams);
* ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
* ZSTD_CCtx_refCDict(zcs, cdict);
*
@@ -2598,12 +2639,180 @@ ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
+ *
+ * *** OVERVIEW ***
+ * The Block-Level Sequence Producer API allows users to provide their own custom
+ * sequence producer which libzstd invokes to process each block. The produced list
+ * of sequences (literals and matches) is then post-processed by libzstd to produce
+ * valid compressed blocks.
+ *
+ * This block-level offload API is a more granular complement of the existing
+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
+ * an easier migration story for applications already integrated with libzstd: the
+ * user application continues to invoke the same compression functions
+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
+ * from the specific advantages of the external sequence producer. For example,
+ * the sequence producer could be tuned to take advantage of known characteristics
+ * of the input, to offer better speed / ratio, or could leverage hardware
+ * acceleration not available within libzstd itself.
+ *
+ * See contrib/externalSequenceProducer for an example program employing the
+ * Block-Level Sequence Producer API.
+ *
+ * *** USAGE ***
+ * The user is responsible for implementing a function of type
+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
+ * arguments to the user-provided function:
+ *
+ * - sequenceProducerState: a pointer to a user-managed state for the sequence
+ * producer.
+ *
+ * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
+ * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
+ * backing outSeqs is managed by the CCtx.
+ *
+ * - src, srcSize: an input buffer for the sequence producer to parse.
+ * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
+ *
+ * - dict, dictSize: a history buffer, which may be empty, which the sequence
+ * producer may reference as it parses the src buffer. Currently, zstd will
+ * always pass dictSize == 0 into external sequence producers, but this will
+ * change in the future.
+ *
+ * - compressionLevel: a signed integer representing the zstd compression level
+ * set by the user for the current operation. The sequence producer may choose
+ * to use this information to change its compression strategy and speed/ratio
+ * tradeoff. Note: the compression level does not reflect zstd parameters set
+ * through the advanced API.
+ *
+ * - windowSize: a size_t representing the maximum allowed offset for external
+ * sequences. Note that sequence offsets are sometimes allowed to exceed the
+ * windowSize if a dictionary is present, see doc/zstd_compression_format.md
+ * for details.
+ *
+ * The user-provided function shall return a size_t representing the number of
+ * sequences written to outSeqs. This return value will be treated as an error
+ * code if it is greater than outSeqsCapacity. The return value must be non-zero
+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
+ * for convenience, but any value greater than outSeqsCapacity will be treated as
+ * an error code.
+ *
+ * If the user-provided function does not return an error code, the sequences
+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
+ * occur if the parse is not valid. A parse is defined to be valid if the
+ * following conditions hold:
+ * - The sum of matchLengths and literalLengths must equal srcSize.
+ * - All sequences in the parse, except for the final sequence, must have
+ * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
+ * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
+ * - All offsets must respect the windowSize parameter as specified in
+ * doc/zstd_compression_format.md.
+ * - If the final sequence has matchLength == 0, it must also have offset == 0.
+ *
+ * zstd will only validate these conditions (and fail compression if they do not
+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
+ * validation has a performance cost.
+ *
+ * If the user-provided function returns an error, zstd will either fall back
+ * to an internal sequence producer or fail the compression operation. The user can
+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
+ * cParam. Fallback compression will follow any other cParam settings, such as
+ * compression level, the same as in a normal compression operation.
+ *
+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
+ * function by calling
+ * ZSTD_registerSequenceProducer(cctx,
+ * sequenceProducerState,
+ * sequenceProducer)
+ * This setting will persist until the next parameter reset of the CCtx.
+ *
+ * The sequenceProducerState must be initialized by the user before calling
+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
+ * sequenceProducerState.
+ *
+ * *** LIMITATIONS ***
+ * This API is compatible with all zstd compression APIs which respect advanced parameters.
+ * However, there are three limitations:
+ *
+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
+ * external sequence producer.
+ * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
+ * cases (see its documentation for details). Users must explicitly set
+ * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
+ * sequence producer is registered.
+ * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
+ * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
+ * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
+ * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
+ *
+ * Second, history buffers are not currently supported. Concretely, zstd will always pass
+ * dictSize == 0 to the external sequence producer (for now). This has two implications:
+ * - Dictionaries are not currently supported. Compression will *not* fail if the user
+ * references a dictionary, but the dictionary won't have any effect.
+ * - Stream history is not currently supported. All advanced compression APIs, including
+ * streaming APIs, work with external sequence producers, but each block is treated as
+ * an independent chunk without history from previous blocks.
+ *
+ * Third, multi-threading within a single compression is not currently supported. In other words,
+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
+ * Multi-threading across compressions is fine: simply create one CCtx per thread.
+ *
+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to
+ * overcoming them. It is purely a question of engineering effort.
+ */
+
+#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
+
+typedef size_t ZSTD_sequenceProducer_F (
+ void* sequenceProducerState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+/*! ZSTD_registerSequenceProducer() :
+ * Instruct zstd to use a block-level external sequence producer function.
+ *
+ * The sequenceProducerState must be initialized by the caller, and the caller is
+ * responsible for managing its lifetime. This parameter is sticky across
+ * compressions. It will remain set until the user explicitly resets compression
+ * parameters.
+ *
+ * Sequence producer registration is considered to be an "advanced parameter",
+ * part of the "advanced API". This means it will only have an effect on compression
+ * APIs which respect advanced parameters, such as compress2() and compressStream2().
+ * Older compression APIs such as compressCCtx(), which predate the introduction of
+ * "advanced parameters", will ignore any external sequence producer setting.
+ *
+ * The sequence producer can be "cleared" by registering a NULL function pointer. This
+ * removes all limitations described above in the "LIMITATIONS" section of the API docs.
+ *
+ * The user is strongly encouraged to read the full API documentation (above) before
+ * calling this function. */
+ZSTDLIB_STATIC_API void
+ZSTD_registerSequenceProducer(
+ ZSTD_CCtx* cctx,
+ void* sequenceProducerState,
+ ZSTD_sequenceProducer_F* sequenceProducer
+);
+
+
/*********************************************************************
-* Buffer-less and synchronous inner streaming functions
+* Buffer-less and synchronous inner streaming functions (DEPRECATED)
*
-* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
-* But it's also a complex one, with several restrictions, documented below.
-* Prefer normal streaming API for an easier experience.
+* This API is deprecated, and will be removed in a future version.
+* It allows streaming (de)compression with user allocated buffers.
+* However, it is hard to use, and not as well tested as the rest of
+* our API.
+*
+* Please use the normal streaming API instead: ZSTD_compressStream2,
+* and ZSTD_decompressStream.
+* If there is functionality that you need, but it doesn't provide,
+* please open an issue on our GitHub.
********************************************************************* */
/**
@@ -2636,15 +2845,20 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
*/
/*===== Buffer-less streaming compression functions =====*/
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
ZSTDLIB_STATIC_API
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
@@ -2728,29 +2942,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
*/
/*===== Buffer-less streaming decompression functions =====*/
-typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
-typedef struct {
- unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
- unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
- unsigned blockSizeMax;
- ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
- unsigned headerSize;
- unsigned dictID;
- unsigned checksumFlag;
- unsigned _reserved1;
- unsigned _reserved2;
-} ZSTD_frameHeader;
-/*! ZSTD_getFrameHeader() :
- * decode Frame Header, or requires larger `srcSize`.
- * @return : 0, `zfhPtr` is correctly filled,
- * >0, `srcSize` is too small, value is wanted `srcSize` amount,
- * or an error code, which can be tested using ZSTD_isError() */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
-/*! ZSTD_getFrameHeader_advanced() :
- * same as ZSTD_getFrameHeader(),
- * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
@@ -2769,11 +2961,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-/* ============================ */
-/** Block level API */
-/* ============================ */
+/* ========================================= */
+/** Block level API (DEPRECATED) */
+/* ========================================= */
/*!
+
+ This API is deprecated in favor of the regular compression API.
+ You can get the frame header down to 2 bytes by setting:
+ - ZSTD_c_format = ZSTD_f_zstd1_magicless
+ - ZSTD_c_contentSizeFlag = 0
+ - ZSTD_c_checksumFlag = 0
+ - ZSTD_c_dictIDFlag = 0
+
+ This API is not as well tested as our normal API, so we recommend not using it.
+ We will be removing it in a future version. If the normal API doesn't provide
+ the functionality you need, please open a GitHub issue.
+
Block functions produce and decode raw zstd blocks, without frame metadata.
Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
@@ -2800,173 +3004,15 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
*/
/*===== Raw zstd block functions =====*/
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
-
-/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
- *
- * *** OVERVIEW ***
- * The Block-Level Sequence Producer API allows users to provide their own custom
- * sequence producer which libzstd invokes to process each block. The produced list
- * of sequences (literals and matches) is then post-processed by libzstd to produce
- * valid compressed blocks.
- *
- * This block-level offload API is a more granular complement of the existing
- * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
- * an easier migration story for applications already integrated with libzstd: the
- * user application continues to invoke the same compression functions
- * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
- * from the specific advantages of the external sequence producer. For example,
- * the sequence producer could be tuned to take advantage of known characteristics
- * of the input, to offer better speed / ratio, or could leverage hardware
- * acceleration not available within libzstd itself.
- *
- * See contrib/externalSequenceProducer for an example program employing the
- * Block-Level Sequence Producer API.
- *
- * *** USAGE ***
- * The user is responsible for implementing a function of type
- * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
- * arguments to the user-provided function:
- *
- * - sequenceProducerState: a pointer to a user-managed state for the sequence
- * producer.
- *
- * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
- * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
- * backing outSeqs is managed by the CCtx.
- *
- * - src, srcSize: an input buffer for the sequence producer to parse.
- * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
- *
- * - dict, dictSize: a history buffer, which may be empty, which the sequence
- * producer may reference as it parses the src buffer. Currently, zstd will
- * always pass dictSize == 0 into external sequence producers, but this will
- * change in the future.
- *
- * - compressionLevel: a signed integer representing the zstd compression level
- * set by the user for the current operation. The sequence producer may choose
- * to use this information to change its compression strategy and speed/ratio
- * tradeoff. Note: the compression level does not reflect zstd parameters set
- * through the advanced API.
- *
- * - windowSize: a size_t representing the maximum allowed offset for external
- * sequences. Note that sequence offsets are sometimes allowed to exceed the
- * windowSize if a dictionary is present, see doc/zstd_compression_format.md
- * for details.
- *
- * The user-provided function shall return a size_t representing the number of
- * sequences written to outSeqs. This return value will be treated as an error
- * code if it is greater than outSeqsCapacity. The return value must be non-zero
- * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
- * for convenience, but any value greater than outSeqsCapacity will be treated as
- * an error code.
- *
- * If the user-provided function does not return an error code, the sequences
- * written to outSeqs must be a valid parse of the src buffer. Data corruption may
- * occur if the parse is not valid. A parse is defined to be valid if the
- * following conditions hold:
- * - The sum of matchLengths and literalLengths must equal srcSize.
- * - All sequences in the parse, except for the final sequence, must have
- * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
- * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
- * - All offsets must respect the windowSize parameter as specified in
- * doc/zstd_compression_format.md.
- * - If the final sequence has matchLength == 0, it must also have offset == 0.
- *
- * zstd will only validate these conditions (and fail compression if they do not
- * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
- * validation has a performance cost.
- *
- * If the user-provided function returns an error, zstd will either fall back
- * to an internal sequence producer or fail the compression operation. The user can
- * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
- * cParam. Fallback compression will follow any other cParam settings, such as
- * compression level, the same as in a normal compression operation.
- *
- * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
- * function by calling
- * ZSTD_registerSequenceProducer(cctx,
- * sequenceProducerState,
- * sequenceProducer)
- * This setting will persist until the next parameter reset of the CCtx.
- *
- * The sequenceProducerState must be initialized by the user before calling
- * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
- * sequenceProducerState.
- *
- * *** LIMITATIONS ***
- * This API is compatible with all zstd compression APIs which respect advanced parameters.
- * However, there are three limitations:
- *
- * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
- * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
- * external sequence producer.
- * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
- * cases (see its documentation for details). Users must explicitly set
- * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
- * sequence producer is registered.
- * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
- * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
- * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
- * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
- *
- * Second, history buffers are not currently supported. Concretely, zstd will always pass
- * dictSize == 0 to the external sequence producer (for now). This has two implications:
- * - Dictionaries are not currently supported. Compression will *not* fail if the user
- * references a dictionary, but the dictionary won't have any effect.
- * - Stream history is not currently supported. All advanced compression APIs, including
- * streaming APIs, work with external sequence producers, but each block is treated as
- * an independent chunk without history from previous blocks.
- *
- * Third, multi-threading within a single compression is not currently supported. In other words,
- * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
- * Multi-threading across compressions is fine: simply create one CCtx per thread.
- *
- * Long-term, we plan to overcome all three limitations. There is no technical blocker to
- * overcoming them. It is purely a question of engineering effort.
- */
-
-#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
-
-typedef size_t ZSTD_sequenceProducer_F (
- void* sequenceProducerState,
- ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
- const void* src, size_t srcSize,
- const void* dict, size_t dictSize,
- int compressionLevel,
- size_t windowSize
-);
-
-/*! ZSTD_registerSequenceProducer() :
- * Instruct zstd to use a block-level external sequence producer function.
- *
- * The sequenceProducerState must be initialized by the caller, and the caller is
- * responsible for managing its lifetime. This parameter is sticky across
- * compressions. It will remain set until the user explicitly resets compression
- * parameters.
- *
- * Sequence producer registration is considered to be an "advanced parameter",
- * part of the "advanced API". This means it will only have an effect on compression
- * APIs which respect advanced parameters, such as compress2() and compressStream2().
- * Older compression APIs such as compressCCtx(), which predate the introduction of
- * "advanced parameters", will ignore any external sequence producer setting.
- *
- * The sequence producer can be "cleared" by registering a NULL function pointer. This
- * removes all limitations described above in the "LIMITATIONS" section of the API docs.
- *
- * The user is strongly encouraged to read the full API documentation (above) before
- * calling this function. */
-ZSTDLIB_STATIC_API void
-ZSTD_registerSequenceProducer(
- ZSTD_CCtx* cctx,
- void* sequenceProducerState,
- ZSTD_sequenceProducer_F* sequenceProducer
-);
-
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
diff --git a/contrib/libs/zstd/programs/benchzstd.c b/contrib/libs/zstd/programs/benchzstd.c
index 63ecd99d51..1c809086db 100644
--- a/contrib/libs/zstd/programs/benchzstd.c
+++ b/contrib/libs/zstd/programs/benchzstd.c
@@ -697,9 +697,9 @@ static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedS
displayLevel, displayName, adv);
}
-BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
- const ZSTD_compressionParameters* compressionParams,
- int displayLevel, const BMK_advancedParams_t* adv)
+int BMK_syntheticTest(int cLevel, double compressibility,
+ const ZSTD_compressionParameters* compressionParams,
+ int displayLevel, const BMK_advancedParams_t* adv)
{
char name[20] = {0};
size_t const benchedSize = 10000000;
@@ -707,12 +707,16 @@ BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
BMK_benchOutcome_t res;
if (cLevel > ZSTD_maxCLevel()) {
- RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
+ DISPLAYLEVEL(1, "Invalid Compression Level");
+ return 15;
}
/* Memory allocation */
srcBuffer = malloc(benchedSize);
- if (!srcBuffer) RETURN_ERROR(21, BMK_benchOutcome_t, "not enough memory");
+ if (!srcBuffer) {
+ DISPLAYLEVEL(1, "allocation error : not enough memory");
+ return 16;
+ }
/* Fill input buffer */
RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
@@ -728,7 +732,7 @@ BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
/* clean up */
free(srcBuffer);
- return res;
+ return !BMK_isSuccessful_benchOutcome(res);
}
@@ -790,7 +794,7 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
return 0;
}
-BMK_benchOutcome_t BMK_benchFilesAdvanced(
+int BMK_benchFilesAdvanced(
const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName, int cLevel,
const ZSTD_compressionParameters* compressionParams,
@@ -805,19 +809,25 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced(
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
if (!nbFiles) {
- RETURN_ERROR(14, BMK_benchOutcome_t, "No Files to Benchmark");
+ DISPLAYLEVEL(1, "No Files to Benchmark");
+ return 13;
}
if (cLevel > ZSTD_maxCLevel()) {
- RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
+ DISPLAYLEVEL(1, "Invalid Compression Level");
+ return 14;
}
if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
- RETURN_ERROR(9, BMK_benchOutcome_t, "Error loading files");
+ DISPLAYLEVEL(1, "Error loading files");
+ return 15;
}
fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
- if (!fileSizes) RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory for fileSizes");
+ if (!fileSizes) {
+ DISPLAYLEVEL(1, "not enough memory for fileSizes");
+ return 16;
+ }
/* Load dictionary */
if (dictFileName != NULL) {
@@ -825,18 +835,21 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced(
if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno));
free(fileSizes);
- RETURN_ERROR(9, BMK_benchOutcome_t, "benchmark aborted");
+ DISPLAYLEVEL(1, "benchmark aborted");
+ return 17;
}
if (dictFileSize > 64 MB) {
free(fileSizes);
- RETURN_ERROR(10, BMK_benchOutcome_t, "dictionary file %s too large", dictFileName);
+ DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
+ return 18;
}
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
if (dictBuffer==NULL) {
free(fileSizes);
- RETURN_ERROR(11, BMK_benchOutcome_t, "not enough memory for dictionary (%u bytes)",
+ DISPLAYLEVEL(1, "not enough memory for dictionary (%u bytes)",
(unsigned)dictBufferSize);
+ return 19;
}
{ int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
@@ -858,7 +871,8 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced(
if (!srcBuffer) {
free(dictBuffer);
free(fileSizes);
- RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory");
+ DISPLAYLEVEL(1, "not enough memory for srcBuffer");
+ return 20;
}
/* Load input buffer */
@@ -886,12 +900,11 @@ _cleanUp:
free(srcBuffer);
free(dictBuffer);
free(fileSizes);
- return res;
+ return !BMK_isSuccessful_benchOutcome(res);
}
-BMK_benchOutcome_t BMK_benchFiles(
- const char* const * fileNamesTable, unsigned nbFiles,
+int BMK_benchFiles(const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName,
int cLevel, const ZSTD_compressionParameters* compressionParams,
int displayLevel)
diff --git a/contrib/libs/zstd/programs/benchzstd.h b/contrib/libs/zstd/programs/benchzstd.h
index aa683dfc25..f14a681925 100644
--- a/contrib/libs/zstd/programs/benchzstd.h
+++ b/contrib/libs/zstd/programs/benchzstd.h
@@ -81,21 +81,13 @@ BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome);
* 2 : + result + interaction + warnings;
* 3 : + information;
* 4 : + debug
- * @return:
- * a variant, which expresses either an error, or a valid result.
- * Use BMK_isSuccessful_benchOutcome() to check if function was successful.
- * If yes, extract the valid result with BMK_extract_benchResult(),
- * it will contain :
- * .cSpeed: compression speed in bytes per second,
- * .dSpeed: decompression speed in bytes per second,
- * .cSize : compressed size, in bytes
- * .cMem : memory budget required for the compression context
+ * @return: 0 on success, !0 on error
*/
-BMK_benchOutcome_t BMK_benchFiles(
- const char* const * fileNamesTable, unsigned nbFiles,
- const char* dictFileName,
- int cLevel, const ZSTD_compressionParameters* compressionParams,
- int displayLevel);
+int BMK_benchFiles(
+ const char* const * fileNamesTable, unsigned nbFiles,
+ const char* dictFileName,
+ int cLevel, const ZSTD_compressionParameters* compressionParams,
+ int displayLevel);
typedef enum {
@@ -126,11 +118,11 @@ BMK_advancedParams_t BMK_initAdvancedParams(void);
/*! BMK_benchFilesAdvanced():
* Same as BMK_benchFiles(),
* with more controls, provided through advancedParams_t structure */
-BMK_benchOutcome_t BMK_benchFilesAdvanced(
- const char* const * fileNamesTable, unsigned nbFiles,
- const char* dictFileName,
- int cLevel, const ZSTD_compressionParameters* compressionParams,
- int displayLevel, const BMK_advancedParams_t* adv);
+int BMK_benchFilesAdvanced(
+ const char* const * fileNamesTable, unsigned nbFiles,
+ const char* dictFileName,
+ int cLevel, const ZSTD_compressionParameters* compressionParams,
+ int displayLevel, const BMK_advancedParams_t* adv);
/*! BMK_syntheticTest() -- called from zstdcli */
/* Generates a sample with datagen, using compressibility argument */
@@ -139,20 +131,11 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced(
* compressionParams - basic compression Parameters
* displayLevel - see benchFiles
* adv - see advanced_Params_t
- * @return:
- * a variant, which expresses either an error, or a valid result.
- * Use BMK_isSuccessful_benchOutcome() to check if function was successful.
- * If yes, extract the valid result with BMK_extract_benchResult(),
- * it will contain :
- * .cSpeed: compression speed in bytes per second,
- * .dSpeed: decompression speed in bytes per second,
- * .cSize : compressed size, in bytes
- * .cMem : memory budget required for the compression context
+ * @return: 0 on success, !0 on error
*/
-BMK_benchOutcome_t BMK_syntheticTest(
- int cLevel, double compressibility,
- const ZSTD_compressionParameters* compressionParams,
- int displayLevel, const BMK_advancedParams_t* adv);
+int BMK_syntheticTest(int cLevel, double compressibility,
+ const ZSTD_compressionParameters* compressionParams,
+ int displayLevel, const BMK_advancedParams_t* adv);
@@ -190,8 +173,8 @@ BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
int displayLevel, const char* displayName);
-/* BMK_benchMemAdvanced() : same as BMK_benchMem()
- * with following additional options :
+/* BMK_benchMemAdvanced() : used by Paramgrill
+ * same as BMK_benchMem() with following additional options :
* dstBuffer - destination buffer to write compressed output in, NULL if none provided.
* dstCapacity - capacity of destination buffer, give 0 if dstBuffer = NULL
* adv = see advancedParams_t
diff --git a/contrib/libs/zstd/programs/fileio.c b/contrib/libs/zstd/programs/fileio.c
index 3b885bc65f..546fd35622 100644
--- a/contrib/libs/zstd/programs/fileio.c
+++ b/contrib/libs/zstd/programs/fileio.c
@@ -485,6 +485,11 @@ void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
prefs->passThrough = (value != 0);
}
+void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
+{
+ prefs->mmapDict = value;
+}
+
/* FIO_ctx_t functions */
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
@@ -576,6 +581,8 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
const char* srcFileName, const char* dstFileName,
const int mode)
{
+ int isDstRegFile;
+
if (prefs->testMode) return NULL; /* do not open file in test mode */
assert(dstFileName != NULL);
@@ -595,11 +602,16 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
return NULL;
}
+ isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */
if (prefs->sparseFileSupport == 1) {
prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
+ if (!isDstRegFile) {
+ prefs->sparseFileSupport = 0;
+ DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n");
+ }
}
- if (UTIL_isRegularFile(dstFileName)) {
+ if (isDstRegFile) {
/* Check if destination file already exists */
#if !defined(_WIN32)
/* this test does not work on Windows :
@@ -644,32 +656,55 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
#endif
if (f == NULL) {
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
+ } else {
+ /* An increased buffer size can provide a significant performance
+ * boost on some platforms. Note that providing a NULL buf with a
+ * size that's not 0 is not defined in ANSI C, but is defined in an
+ * extension. There are three possibilities here:
+ * 1. Libc supports the extended version and everything is good.
+ * 2. Libc ignores the size when buf is NULL, in which case
+ * everything will continue as if we didn't call `setvbuf()`.
+ * 3. We fail the call and execution continues but a warning
+ * message might be shown.
+ * In all cases due execution continues. For now, I believe that
+ * this is a more cost-effective solution than managing the buffers
+ * allocations ourselves (will require an API change).
+ */
+ if (setvbuf(f, NULL, _IOFBF, 1 MB)) {
+ DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
+ }
}
- /* An increased buffer size can provide a significant performance boost on some platforms.
- * Note that providing a NULL buf with a size that's not 0 is not defined in ANSI C, but is defined
- * in an extension. There are three possibilities here -
- * 1. Libc supports the extended version and everything is good.
- * 2. Libc ignores the size when buf is NULL, in which case everything will continue as if we didn't
- * call `setvbuf`.
- * 3. We fail the call and execution continues but a warning message might be shown.
- * In all cases due execution continues. For now, I believe that this is a more cost-effective
- * solution than managing the buffers allocations ourselves (will require an API change). */
- if(setvbuf(f, NULL, _IOFBF, 1 MB))
- DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
return f;
}
}
-/*! FIO_createDictBuffer() :
- * creates a buffer, pointed by `*bufferPtr`,
+
+/* FIO_getDictFileStat() :
+ */
+static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
+ assert(dictFileStat != NULL);
+ if (fileName == NULL) return;
+
+ if (!UTIL_stat(fileName, dictFileStat)) {
+ EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
+ }
+
+ if (!UTIL_isRegularFileStat(dictFileStat)) {
+ EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
+ }
+}
+
+/* FIO_setDictBufferMalloc() :
+ * allocates a buffer, pointed by `dict->dictBuffer`,
* loads `filename` content into it, up to DICTSIZE_MAX bytes.
* @return : loaded size
* if fileName==NULL, returns 0 and a NULL pointer
*/
-static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
+static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
FILE* fileHandle;
U64 fileSize;
+ void** bufferPtr = &dict->dictBuffer;
assert(bufferPtr != NULL);
assert(dictFileStat != NULL);
@@ -678,14 +713,6 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
- if (!UTIL_stat(fileName, dictFileStat)) {
- EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
- }
-
- if (!UTIL_isRegularFileStat(dictFileStat)) {
- EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
- }
-
fileHandle = fopen(fileName, "rb");
if (fileHandle == NULL) {
@@ -712,6 +739,130 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p
return (size_t)fileSize;
}
+#if (PLATFORM_POSIX_VERSION > 0)
+#include <sys/mman.h>
+static void FIO_munmap(FIO_Dict_t* dict)
+{
+ munmap(dict->dictBuffer, dict->dictBufferSize);
+ dict->dictBuffer = NULL;
+ dict->dictBufferSize = 0;
+}
+static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
+{
+ int fileHandle;
+ U64 fileSize;
+ void** bufferPtr = &dict->dictBuffer;
+
+ assert(bufferPtr != NULL);
+ assert(dictFileStat != NULL);
+ *bufferPtr = NULL;
+ if (fileName == NULL) return 0;
+
+ DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
+
+ fileHandle = open(fileName, O_RDONLY);
+
+ if (fileHandle == -1) {
+ EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
+ }
+
+ fileSize = UTIL_getFileSizeStat(dictFileStat);
+ {
+ size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
+ if (fileSize > dictSizeMax) {
+ EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
+ fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
+ }
+ }
+
+ *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
+ if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
+
+ close(fileHandle);
+ return (size_t)fileSize;
+}
+#elif defined(_MSC_VER) || defined(_WIN32)
+#include <windows.h>
+static void FIO_munmap(FIO_Dict_t* dict)
+{
+ UnmapViewOfFile(dict->dictBuffer);
+ CloseHandle(dict->dictHandle);
+ dict->dictBuffer = NULL;
+ dict->dictBufferSize = 0;
+}
+static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
+{
+ HANDLE fileHandle, mapping;
+ U64 fileSize;
+ void** bufferPtr = &dict->dictBuffer;
+
+ assert(bufferPtr != NULL);
+ assert(dictFileStat != NULL);
+ *bufferPtr = NULL;
+ if (fileName == NULL) return 0;
+
+ DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
+
+ fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+
+ if (fileHandle == INVALID_HANDLE_VALUE) {
+ EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
+ }
+
+ fileSize = UTIL_getFileSizeStat(dictFileStat);
+ {
+ size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
+ if (fileSize > dictSizeMax) {
+ EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
+ fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
+ }
+ }
+
+ mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (mapping == NULL) {
+ EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno));
+ }
+
+ *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */
+ if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno));
+
+ dict->dictHandle = fileHandle;
+ return (size_t)fileSize;
+}
+#else
+static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
+{
+ return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
+}
+static void FIO_munmap(FIO_Dict_t* dict) {
+ free(dict->dictBuffer);
+ dict->dictBuffer = NULL;
+ dict->dictBufferSize = 0;
+}
+#endif
+
+static void FIO_freeDict(FIO_Dict_t* dict) {
+ if (dict->dictBufferType == FIO_mallocDict) {
+ free(dict->dictBuffer);
+ dict->dictBuffer = NULL;
+ dict->dictBufferSize = 0;
+ } else if (dict->dictBufferType == FIO_mmapDict) {
+ FIO_munmap(dict);
+ } else {
+ assert(0); /* Should not reach this case */
+ }
+}
+
+static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) {
+ dict->dictBufferType = dictBufferType;
+ if (dict->dictBufferType == FIO_mallocDict) {
+ dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
+ } else if (dict->dictBufferType == FIO_mmapDict) {
+ dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat);
+ } else {
+ assert(0); /* Should not reach this case */
+ }
+}
/* FIO_checkFilenameCollisions() :
@@ -914,8 +1065,7 @@ static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
* Compression
************************************************************************/
typedef struct {
- void* dictBuffer;
- size_t dictBufferSize;
+ FIO_Dict_t dict;
const char* dictFileName;
stat_t dictFileStat;
ZSTD_CStream* cctx;
@@ -961,6 +1111,9 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, unsigned long long const maxSrcFileSize,
int cLevel, ZSTD_compressionParameters comprParams) {
+ int useMMap = prefs->mmapDict == ZSTD_ps_enable;
+ int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
+ FIO_dictBufferType_t dictBufferType;
cRess_t ress;
memset(&ress, 0, sizeof(ress));
@@ -970,19 +1123,25 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
strerror(errno));
+ FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
+
/* need to update memLimit before calling createDictBuffer
* because of memLimit check inside it */
if (prefs->patchFromMode) {
+ U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
- FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
+ useMMap |= dictSize > prefs->memLimit;
+ FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
}
- ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs, &ress.dictFileStat); /* works with dictFileName==NULL */
+
+ dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
+ FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
/* Advanced parameters, including dictionary */
- if (dictFileName && (ress.dictBuffer==NULL))
+ if (dictFileName && (ress.dict.dictBuffer==NULL))
EXM_THROW(32, "allocation error : can't create dictBuffer");
ress.dictFileName = dictFileName;
@@ -1032,17 +1191,17 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
#endif
/* dictionary */
if (prefs->patchFromMode) {
- CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
+ CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
} else {
- CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
+ CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
}
return ress;
}
-static void FIO_freeCResources(const cRess_t* const ress)
+static void FIO_freeCResources(cRess_t* const ress)
{
- free(ress->dictBuffer);
+ FIO_freeDict(&(ress->dict));
AIO_WritePool_free(ress->writeCtx);
AIO_ReadPool_free(ress->readCtx);
ZSTD_freeCStream(ress->cctx); /* never fails */
@@ -1173,8 +1332,8 @@ FIO_compressLzmaFrame(cRess_t* ress,
}
writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
- strm.next_out = (Bytef*)writeJob->buffer;
- strm.avail_out = (uInt)writeJob->bufferSize;
+ strm.next_out = (BYTE*)writeJob->buffer;
+ strm.avail_out = writeJob->bufferSize;
strm.next_in = 0;
strm.avail_in = 0;
@@ -1201,7 +1360,7 @@ FIO_compressLzmaFrame(cRess_t* ress,
writeJob->usedBufferSize = compBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += compBytes;
- strm.next_out = (Bytef*)writeJob->buffer;
+ strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
} }
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
@@ -1681,6 +1840,7 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
int result;
int transferStat = 0;
FILE *dstFile;
+ int dstFd = -1;
assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
@@ -1696,6 +1856,7 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
if (dstFile==NULL) return 1; /* could not open dstFileName */
+ dstFd = fileno(dstFile);
AIO_WritePool_setFile(ress.writeCtx, dstFile);
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
@@ -1709,14 +1870,20 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
if (closeDstFile) {
clearHandler();
+ if (transferStat) {
+ UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
+ }
+
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result=1;
}
+
if (transferStat) {
- UTIL_setFileStat(dstFileName, srcFileStat);
+ UTIL_utime(dstFileName, srcFileStat);
}
+
if ( (result != 0) /* operation failure */
&& strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
) {
@@ -1873,7 +2040,7 @@ int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const
const char* srcFileName, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams)
{
- cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
+ cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
#define DISPLAY_LEVEL_DEFAULT 2
@@ -2043,6 +2210,7 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
* Decompression
***************************************************************************/
typedef struct {
+ FIO_Dict_t dict;
ZSTD_DStream* dctx;
WritePoolCtx_t *writeCtx;
ReadPoolCtx_t *readCtx;
@@ -2050,11 +2218,19 @@ typedef struct {
static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
{
+ int useMMap = prefs->mmapDict == ZSTD_ps_enable;
+ int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
+ stat_t statbuf;
dRess_t ress;
memset(&ress, 0, sizeof(ress));
- if (prefs->patchFromMode)
- FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
+ FIO_getDictFileStat(dictFileName, &statbuf);
+
+ if (prefs->patchFromMode){
+ U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
+ useMMap |= dictSize > prefs->memLimit;
+ FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
+ }
/* Allocation */
ress.dctx = ZSTD_createDStream();
@@ -2064,29 +2240,34 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
/* dictionary */
- { void* dictBuffer;
- stat_t statbuf;
- size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs, &statbuf);
- CHECK( ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
- CHECK( ZSTD_DCtx_loadDictionary(ress.dctx, dictBuffer, dictBufferSize) );
- free(dictBuffer);
+ {
+ FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
+ FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType);
+
+ CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
+
+ if (prefs->patchFromMode){
+ CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
+ } else {
+ CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
+ }
}
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
-
return ress;
}
static void FIO_freeDResources(dRess_t ress)
{
+ FIO_freeDict(&(ress.dict));
CHECK( ZSTD_freeDStream(ress.dctx) );
AIO_WritePool_free(ress.writeCtx);
AIO_ReadPool_free(ress.readCtx);
}
-/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
- @return : 0 (no error) */
+/* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
+ * @return : 0 (no error) */
static int FIO_passThrough(dRess_t *ress)
{
size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
@@ -2114,7 +2295,8 @@ static int FIO_passThrough(dRess_t *ress)
static void
FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
const dRess_t* ress,
- size_t err, const char* srcFileName)
+ size_t err,
+ const char* srcFileName)
{
ZSTD_frameHeader header;
@@ -2316,8 +2498,8 @@ FIO_decompressLzmaFrame(dRess_t* ress,
}
writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
- strm.next_out = (Bytef*)writeJob->buffer;
- strm.avail_out = (uInt)writeJob->bufferSize;
+ strm.next_out = (BYTE*)writeJob->buffer;
+ strm.avail_out = writeJob->bufferSize;
strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
strm.avail_in = ress->readCtx->srcBufferLoaded;
@@ -2345,7 +2527,7 @@ FIO_decompressLzmaFrame(dRess_t* ress,
writeJob->usedBufferSize = decompBytes;
AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
outFileSize += decompBytes;
- strm.next_out = (Bytef*)writeJob->buffer;
+ strm.next_out = (BYTE*)writeJob->buffer;
strm.avail_out = writeJob->bufferSize;
} }
if (ret == LZMA_STREAM_END) break;
@@ -2540,6 +2722,7 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
int result;
int releaseDstFile = 0;
int transferStat = 0;
+ int dstFd = 0;
if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) {
FILE *dstFile;
@@ -2555,6 +2738,7 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
if (dstFile==NULL) return 1;
+ dstFd = fileno(dstFile);
AIO_WritePool_setFile(ress.writeCtx, dstFile);
/* Must only be added after FIO_openDstFile() succeeds.
@@ -2568,13 +2752,18 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
if (releaseDstFile) {
clearHandler();
+
+ if (transferStat) {
+ UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
+ }
+
if (AIO_WritePool_closeFile(ress.writeCtx)) {
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result = 1;
}
if (transferStat) {
- UTIL_setFileStat(dstFileName, srcFileStat);
+ UTIL_utime(dstFileName, srcFileStat);
}
if ( (result != 0) /* operation failure */
@@ -2655,6 +2844,8 @@ int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
+
+
FIO_freeDResources(ress);
return decodingError;
}
diff --git a/contrib/libs/zstd/programs/fileio.h b/contrib/libs/zstd/programs/fileio.h
index 291d4d4145..224d89525d 100644
--- a/contrib/libs/zstd/programs/fileio.h
+++ b/contrib/libs/zstd/programs/fileio.h
@@ -106,6 +106,7 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
void FIO_displayCompressionParameters(const FIO_prefs_t* prefs);
void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value);
void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value);
+void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value);
/* FIO_ctx_t functions */
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value);
diff --git a/contrib/libs/zstd/programs/fileio_types.h b/contrib/libs/zstd/programs/fileio_types.h
index c1f42f1ad0..2994a60929 100644
--- a/contrib/libs/zstd/programs/fileio_types.h
+++ b/contrib/libs/zstd/programs/fileio_types.h
@@ -69,6 +69,18 @@ typedef struct FIO_prefs_s {
int contentSize;
int allowBlockDevices;
int passThrough;
+ ZSTD_paramSwitch_e mmapDict;
} FIO_prefs_t;
+typedef enum {FIO_mallocDict, FIO_mmapDict} FIO_dictBufferType_t;
+
+typedef struct {
+ void* dictBuffer;
+ size_t dictBufferSize;
+ FIO_dictBufferType_t dictBufferType;
+#if defined(_MSC_VER) || defined(_WIN32)
+ HANDLE dictHandle;
+#endif
+} FIO_Dict_t;
+
#endif /* FILEIO_TYPES_HEADER */
diff --git a/contrib/libs/zstd/programs/timefn.c b/contrib/libs/zstd/programs/timefn.c
index f941e57e61..4f045226b8 100644
--- a/contrib/libs/zstd/programs/timefn.c
+++ b/contrib/libs/zstd/programs/timefn.c
@@ -88,7 +88,7 @@ UTIL_time_t UTIL_getTime(void)
/* C11 requires support of timespec_get().
* However, FreeBSD 11 claims C11 compliance while lacking timespec_get().
* Double confirm timespec_get() support by checking the definition of TIME_UTC.
- * However, some versions of Android manage to simultanously define TIME_UTC
+ * However, some versions of Android manage to simultaneously define TIME_UTC
* and lack timespec_get() support... */
#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \
&& defined(TIME_UTC) && !defined(__ANDROID__)
diff --git a/contrib/libs/zstd/programs/util.c b/contrib/libs/zstd/programs/util.c
index e017772ef6..c9031e91d3 100644
--- a/contrib/libs/zstd/programs/util.c
+++ b/contrib/libs/zstd/programs/util.c
@@ -102,6 +102,17 @@ UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
#define chmod _chmod
#endif
+#ifndef ZSTD_HAVE_FCHMOD
+#if PLATFORM_POSIX_VERSION >= 199309L
+#define ZSTD_HAVE_FCHMOD
+#endif
+#endif
+
+#ifndef ZSTD_HAVE_FCHOWN
+#if PLATFORM_POSIX_VERSION >= 200809L
+#define ZSTD_HAVE_FCHOWN
+#endif
+#endif
/*-****************************************
* Console log
@@ -147,21 +158,38 @@ void UTIL_traceFileStat(void)
g_traceFileStat = 1;
}
-int UTIL_stat(const char* filename, stat_t* statbuf)
+int UTIL_fstat(const int fd, const char* filename, stat_t* statbuf)
{
int ret;
- UTIL_TRACE_CALL("UTIL_stat(%s)", filename);
+ UTIL_TRACE_CALL("UTIL_stat(%d, %s)", fd, filename);
#if defined(_MSC_VER)
- ret = !_stat64(filename, statbuf);
+ if (fd >= 0) {
+ ret = !_fstat64(fd, statbuf);
+ } else {
+ ret = !_stat64(filename, statbuf);
+ }
#elif defined(__MINGW32__) && defined (__MSVCRT__)
- ret = !_stati64(filename, statbuf);
+ if (fd >= 0) {
+ ret = !_fstati64(fd, statbuf);
+ } else {
+ ret = !_stati64(filename, statbuf);
+ }
#else
- ret = !stat(filename, statbuf);
+ if (fd >= 0) {
+ ret = !fstat(fd, statbuf);
+ } else {
+ ret = !stat(filename, statbuf);
+ }
#endif
UTIL_TRACE_RET(ret);
return ret;
}
+int UTIL_stat(const char* filename, stat_t* statbuf)
+{
+ return UTIL_fstat(-1, filename, statbuf);
+}
+
int UTIL_isRegularFile(const char* infilename)
{
stat_t statbuf;
@@ -184,10 +212,15 @@ int UTIL_isRegularFileStat(const stat_t* statbuf)
/* like chmod, but avoid changing permission of /dev/null */
int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions)
{
+ return UTIL_fchmod(-1, filename, statbuf, permissions);
+}
+
+int UTIL_fchmod(const int fd, char const* filename, const stat_t* statbuf, mode_t permissions)
+{
stat_t localStatBuf;
UTIL_TRACE_CALL("UTIL_chmod(%s, %#4o)", filename, (unsigned)permissions);
if (statbuf == NULL) {
- if (!UTIL_stat(filename, &localStatBuf)) {
+ if (!UTIL_fstat(fd, filename, &localStatBuf)) {
UTIL_TRACE_RET(0);
return 0;
}
@@ -197,9 +230,20 @@ int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions)
UTIL_TRACE_RET(0);
return 0; /* pretend success, but don't change anything */
}
- UTIL_TRACE_CALL("chmod");
+#ifdef ZSTD_HAVE_FCHMOD
+ if (fd >= 0) {
+ int ret;
+ UTIL_TRACE_CALL("fchmod");
+ ret = fchmod(fd, permissions);
+ UTIL_TRACE_RET(ret);
+ UTIL_TRACE_RET(ret);
+ return ret;
+ } else
+#endif
{
- int const ret = chmod(filename, permissions);
+ int ret;
+ UTIL_TRACE_CALL("chmod");
+ ret = chmod(filename, permissions);
UTIL_TRACE_RET(ret);
UTIL_TRACE_RET(ret);
return ret;
@@ -237,18 +281,20 @@ int UTIL_utime(const char* filename, const stat_t *statbuf)
int UTIL_setFileStat(const char *filename, const stat_t *statbuf)
{
+ return UTIL_setFDStat(-1, filename, statbuf);
+}
+
+int UTIL_setFDStat(const int fd, const char *filename, const stat_t *statbuf)
+{
int res = 0;
stat_t curStatBuf;
- UTIL_TRACE_CALL("UTIL_setFileStat(%s)", filename);
+ UTIL_TRACE_CALL("UTIL_setFileStat(%d, %s)", fd, filename);
- if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) {
+ if (!UTIL_fstat(fd, filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) {
UTIL_TRACE_RET(-1);
return -1;
}
- /* set access and modification times */
- res += UTIL_utime(filename, statbuf);
-
/* Mimic gzip's behavior:
*
* "Change the group first, then the permissions, then the owner.
@@ -258,13 +304,27 @@ int UTIL_setFileStat(const char *filename, const stat_t *statbuf)
* setgid bits." */
#if !defined(_WIN32)
- res += chown(filename, -1, statbuf->st_gid); /* Apply group ownership */
+#ifdef ZSTD_HAVE_FCHOWN
+ if (fd >= 0) {
+ res += fchown(fd, -1, statbuf->st_gid); /* Apply group ownership */
+ } else
+#endif
+ {
+ res += chown(filename, -1, statbuf->st_gid); /* Apply group ownership */
+ }
#endif
- res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 0777); /* Copy file permissions */
+ res += UTIL_fchmod(fd, filename, &curStatBuf, statbuf->st_mode & 0777); /* Copy file permissions */
#if !defined(_WIN32)
- res += chown(filename, statbuf->st_uid, -1); /* Apply user ownership */
+#ifdef ZSTD_HAVE_FCHOWN
+ if (fd >= 0) {
+ res += fchown(fd, statbuf->st_uid, -1); /* Apply user ownership */
+ } else
+#endif
+ {
+ res += chown(filename, statbuf->st_uid, -1); /* Apply user ownership */
+ }
#endif
errno = 0;
diff --git a/contrib/libs/zstd/programs/util.h b/contrib/libs/zstd/programs/util.h
index 4ec54137dd..8234646bf3 100644
--- a/contrib/libs/zstd/programs/util.h
+++ b/contrib/libs/zstd/programs/util.h
@@ -126,15 +126,25 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, const
/**
* Calls platform's equivalent of stat() on filename and writes info to statbuf.
* Returns success (1) or failure (0).
+ *
+ * UTIL_fstat() is like UTIL_stat() but takes an optional fd that refers to the
+ * file in question. It turns out that this can be meaningfully faster. If fd is
+ * -1, behaves just like UTIL_stat() (i.e., falls back to using the filename).
*/
int UTIL_stat(const char* filename, stat_t* statbuf);
+int UTIL_fstat(const int fd, const char* filename, stat_t* statbuf);
/**
* Instead of getting a file's stats, this updates them with the info in the
* provided stat_t. Currently sets owner, group, atime, and mtime. Will only
* update this info for regular files.
+ *
+ * UTIL_setFDStat() also takes an fd, and will preferentially use that to
+ * indicate which file to modify, If fd is -1, it will fall back to using the
+ * filename.
*/
int UTIL_setFileStat(const char* filename, const stat_t* statbuf);
+int UTIL_setFDStat(const int fd, const char* filename, const stat_t* statbuf);
/**
* Set atime to now and mtime to the st_mtim in statbuf.
@@ -159,8 +169,11 @@ U64 UTIL_getFileSizeStat(const stat_t* statbuf);
* Like chmod(), but only modifies regular files. Provided statbuf may be NULL,
* in which case this function will stat() the file internally, in order to
* check whether it should be modified.
+ *
+ * If fd is -1, fd is ignored and the filename is used.
*/
int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions);
+int UTIL_fchmod(const int fd, char const* filename, const stat_t* statbuf, mode_t permissions);
/*
* In the absence of a pre-existing stat result on the file in question, these
diff --git a/contrib/libs/zstd/programs/zstdcli.c b/contrib/libs/zstd/programs/zstdcli.c
index 93f75e21d9..d2465456b5 100644
--- a/contrib/libs/zstd/programs/zstdcli.c
+++ b/contrib/libs/zstd/programs/zstdcli.c
@@ -37,7 +37,7 @@
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
#ifndef ZSTD_NOBENCH
-# include "benchzstd.h" /* BMK_benchFiles */
+# include "benchzstd.h" /* BMK_benchFilesAdvanced */
#endif
#ifndef ZSTD_NODICT
# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */
@@ -165,7 +165,7 @@ static void usage(FILE* f, const char* programName)
#endif
DISPLAY_F(f, " -D DICT Use DICT as the dictionary for compression or decompression.\n\n");
DISPLAY_F(f, " -f, --force Disable input and output checks. Allows overwriting existing files,\n");
- DISPLAY_F(f, " receiving input from the console, printing ouput to STDOUT, and\n");
+ DISPLAY_F(f, " receiving input from the console, printing output to STDOUT, and\n");
DISPLAY_F(f, " operating on links, block devices, etc. Unrecognized formats will be\n");
DISPLAY_F(f, " passed-through through as-is.\n\n");
@@ -254,6 +254,7 @@ static void usage_advanced(const char* programName)
DISPLAYOUT("\n");
DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n");
+ DISPLAYOUT(" --mmap-dict Memory-map dictionary file rather than mallocing and loading all at once");
#ifdef ZSTD_GZCOMPRESS
DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n");
#endif
@@ -851,6 +852,7 @@ int main(int argCount, const char* argv[])
ultra=0,
contentSize=1,
removeSrcFile=0;
+ ZSTD_paramSwitch_e mmapDict=ZSTD_ps_auto;
ZSTD_paramSwitch_e useRowMatchFinder = ZSTD_ps_auto;
FIO_compressionType_t cType = FIO_zstdCompression;
unsigned nbWorkers = 0;
@@ -984,6 +986,8 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; cType = FIO_zstdCompression; continue; }
+ if (!strcmp(argument, "--mmap-dict")) { mmapDict = ZSTD_ps_enable; continue; }
+ if (!strcmp(argument, "--no-mmap-dict")) { mmapDict = ZSTD_ps_disable; continue; }
#ifdef ZSTD_GZCOMPRESS
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; cType = FIO_gzipCompression; continue; }
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
@@ -1391,18 +1395,15 @@ int main(int argCount, const char* argv[])
int c;
DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);
for(c = cLevel; c <= cLevelLast; c++) {
- BMK_benchOutcome_t const bo = BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
- if (!BMK_isSuccessful_benchOutcome(bo)) return 1;
+ operationResult = BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
} }
} else {
for(; cLevel <= cLevelLast; cLevel++) {
- BMK_benchOutcome_t const bo = BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
- if (!BMK_isSuccessful_benchOutcome(bo)) return 1;
+ operationResult = BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
} }
} else {
for(; cLevel <= cLevelLast; cLevel++) {
- BMK_benchOutcome_t const bo = BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
- if (!BMK_isSuccessful_benchOutcome(bo)) return 1;
+ operationResult = BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
} }
#else
@@ -1526,6 +1527,7 @@ int main(int argCount, const char* argv[])
FIO_setNotificationLevel(g_displayLevel);
FIO_setAllowBlockDevices(prefs, allowBlockDevices);
FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
+ FIO_setMMapDict(prefs, mmapDict);
if (memLimit == 0) {
if (compressionParams.windowLog == 0) {
memLimit = (U32)1 << g_defaultMaxWindowLog;