diff options
author | thegeorg <thegeorg@yandex-team.com> | 2023-02-27 12:38:58 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2023-02-27 12:38:58 +0300 |
commit | e82ffade6959fe8feaf41fe12a57d9f56873be40 (patch) | |
tree | 784bf61d9ca6cab4beabc995186d9b41e762ae60 /contrib/libs/zstd/lib/common | |
parent | e58cceed352de42c1526ab4eecdfeee158b1ede3 (diff) | |
download | ydb-e82ffade6959fe8feaf41fe12a57d9f56873be40.tar.gz |
Update contrib/libs/zstd to 1.5.4
Diffstat (limited to 'contrib/libs/zstd/lib/common')
22 files changed, 506 insertions, 648 deletions
diff --git a/contrib/libs/zstd/lib/common/bits.h b/contrib/libs/zstd/lib/common/bits.h new file mode 100644 index 0000000000..7939f3d0f3 --- /dev/null +++ b/contrib/libs/zstd/lib/common/bits.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_BITS_H +#define ZSTD_BITS_H + +#include "mem.h" + +MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) +{ + assert(val != 0); + { + static const int DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9}; + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; + } +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) +{ + assert(val != 0); +# if defined(_MSC_VER) +# if STATIC_BMI2 == 1 + return _tzcnt_u32(val); +# else + if (val != 0) { + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)__builtin_ctz(val); +# else + return ZSTD_countTrailingZeros32_fallback(val); +# endif +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) { + assert(val != 0); + { + static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; + } +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) +{ + assert(val != 0); +# if defined(_MSC_VER) +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val); +# else + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)(31 - r); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)__builtin_clz(val); +# else + return ZSTD_countLeadingZeros32_fallback(val); +# endif +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) +{ + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 == 1 + return _tzcnt_u64(val); +# else + if (val != 0) { + unsigned long r; + _BitScanForward64(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__) + return (unsigned)__builtin_ctzll(val); +# else + { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (leastSignificantWord == 0) { + return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); + } else { + return ZSTD_countTrailingZeros32(leastSignificantWord); + } + } +# endif +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) +{ + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 == 1 + return _lzcnt_u64(val); +# else + if (val != 0) { + unsigned long r; + _BitScanReverse64(&r, val); + return (unsigned)(63 - r); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)(__builtin_clzll(val)); +# else + { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (mostSignificantWord == 0) { + return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); + } else { + return ZSTD_countLeadingZeros32(mostSignificantWord); + } + } +# endif +} + +MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { + return ZSTD_countTrailingZeros64((U64)val) >> 3; + } else { + return ZSTD_countTrailingZeros32((U32)val) >> 3; + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { + return ZSTD_countLeadingZeros64((U64)val) >> 3; + } else { + return ZSTD_countLeadingZeros32((U32)val) >> 3; + } + } +} + +MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + return 31 - ZSTD_countLeadingZeros32(val); +} + +#endif /* ZSTD_BITS_H */ diff --git a/contrib/libs/zstd/lib/common/bitstream.h b/contrib/libs/zstd/lib/common/bitstream.h index 84b6062ff3..db1b4cf136 100644 --- a/contrib/libs/zstd/lib/common/bitstream.h +++ b/contrib/libs/zstd/lib/common/bitstream.h @@ -1,7 +1,7 @@ /* ****************************************************************** * bitstream * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -30,14 +30,15 @@ extern "C" { #include "compiler.h" /* UNLIKELY() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ +#include "bits.h" /* ZSTD_highbit32 */ /*========================================= * Target specific =========================================*/ #ifndef ZSTD_NO_INTRINSICS -# if defined(__BMI__) && defined(__GNUC__) -# include <immintrin.h> /* support for bextr (experimental) */ +# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) +# include <immintrin.h> /* support for bextr (experimental)/bzhi */ # elif defined(__ICCARM__) # include <intrinsics.h> # endif @@ -132,48 +133,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /* faster, but works only if nbBits >= 1 */ - - -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ -# if STATIC_BMI2 == 1 - return _lzcnt_u32(val) ^ 31; -# else - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; -# endif - } -} - /*===== Local Constants =====*/ static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, @@ -203,6 +162,16 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, return 0; } +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + /*! BIT_addBits() : * can add up to 31 bits into `bitC`. * Note : does not check for register overflow ! */ @@ -212,7 +181,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); assert(nbBits < BIT_MASK_SIZE); assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; bitC->bitPos += nbBits; } @@ -291,7 +260,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->bitContainer = MEM_readLEST(bitD->ptr); { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } } else { bitD->ptr = bitD->start; @@ -319,7 +288,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si default: break; } { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ } bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; @@ -350,16 +319,6 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c #endif } -MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -{ -#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 - return _bzhi_u64(bitContainer, nbBits); -#else - assert(nbBits < BIT_MASK_SIZE); - return bitContainer & BIT_mask[nbBits]; -#endif -} - /*! BIT_lookBits() : * Provides next n bits from local register. * local register is not modified. @@ -406,7 +365,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n } /*! BIT_readBitsFast() : - * unsafe version; only works only if nbBits >= 1 */ + * unsafe version; only works if nbBits >= 1 */ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBitsFast(bitD, nbBits); diff --git a/contrib/libs/zstd/lib/common/compiler.h b/contrib/libs/zstd/lib/common/compiler.h index 516930c01e..d4f2f285d7 100644 --- a/contrib/libs/zstd/lib/common/compiler.h +++ b/contrib/libs/zstd/lib/common/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -165,6 +165,12 @@ #define UNLIKELY(x) (x) #endif +#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) +# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } +#else +# define ZSTD_UNREACHABLE { assert(0); } +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include <intrin.h> /* For Visual 2005 */ @@ -181,6 +187,8 @@ # ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 # define STATIC_BMI2 1 # endif +# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__) +# define STATIC_BMI2 1 # endif #endif @@ -273,7 +281,18 @@ * Sanitizer *****************************************************************/ -#if ZSTD_MEMORY_SANITIZER +/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an + * abundance of caution, disable our custom poisoning on mingw. */ +#ifdef __MINGW32__ +#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE +#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 +#endif +#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE +#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 +#endif +#endif + +#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE) /* Not all platforms that support msan provide sanitizers/msan_interface.h. * We therefore declare the functions we need ourselves, rather than trying to * include the header file... */ @@ -294,7 +313,7 @@ void __msan_poison(const volatile void *a, size_t size); intptr_t __msan_test_shadow(const volatile void *x, size_t size); #endif -#if ZSTD_ADDRESS_SANITIZER +#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE) /* Not all platforms that support asan provide sanitizers/asan_interface.h. * We therefore declare the functions we need ourselves, rather than trying to * include the header file... */ diff --git a/contrib/libs/zstd/lib/common/cpu.h b/contrib/libs/zstd/lib/common/cpu.h index 8acd33be3c..8bc34a36da 100644 --- a/contrib/libs/zstd/lib/common/cpu.h +++ b/contrib/libs/zstd/lib/common/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/lib/common/debug.c b/contrib/libs/zstd/lib/common/debug.c index bb863c9ea6..ebf7bfccfa 100644 --- a/contrib/libs/zstd/lib/common/debug.c +++ b/contrib/libs/zstd/lib/common/debug.c @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/contrib/libs/zstd/lib/common/debug.h b/contrib/libs/zstd/lib/common/debug.h index 3b2a320a18..0e9817ea6d 100644 --- a/contrib/libs/zstd/lib/common/debug.h +++ b/contrib/libs/zstd/lib/common/debug.h @@ -1,7 +1,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy diff --git a/contrib/libs/zstd/lib/common/entropy_common.c b/contrib/libs/zstd/lib/common/entropy_common.c index 4229b40c5e..e2173afb0a 100644 --- a/contrib/libs/zstd/lib/common/entropy_common.c +++ b/contrib/libs/zstd/lib/common/entropy_common.c @@ -1,6 +1,6 @@ /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -19,8 +19,8 @@ #include "error_private.h" /* ERR_*, ERROR */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ #include "fse.h" -#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ #include "huf.h" +#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ /*=== Version ===*/ @@ -38,34 +38,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -static U32 FSE_ctz(U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - if (val != 0) { - unsigned long r; - _BitScanForward(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_ctz(val); -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return __CTZ(val); -# else /* Software version */ - U32 count = 0; - while ((val & 1) == 0) { - val >>= 1; - ++count; - } - return count; -# endif - } -} - FORCE_INLINE_TEMPLATE size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, const void* headerBuffer, size_t hbSize) @@ -113,7 +85,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * repeat. * Avoid UB by setting the high bit to 1. */ - int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; while (repeats >= 12) { charnum += 3 * 12; if (LIKELY(ip <= iend-7)) { @@ -124,7 +96,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne ip = iend - 4; } bitStream = MEM_readLE32(ip) >> bitCount; - repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; } charnum += 3 * repeats; bitStream >>= 2 * repeats; @@ -189,7 +161,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne * know that threshold > 1. */ if (remaining <= 1) break; - nbBits = BIT_highbit32(remaining) + 1; + nbBits = ZSTD_highbit32(remaining) + 1; threshold = 1 << (nbBits - 1); } if (charnum >= maxSV1) break; @@ -264,7 +236,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, const void* src, size_t srcSize) { U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; - return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); } FORCE_INLINE_TEMPLATE size_t @@ -312,14 +284,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); *tableLogPtr = tableLog; /* determine last weight */ { U32 const total = 1 << tableLog; U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; + U32 const verif = 1 << ZSTD_highbit32(rest); + U32 const lastWeight = ZSTD_highbit32(rest) + 1; if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ huffWeight[oSize] = (BYTE)lastWeight; rankStats[lastWeight]++; @@ -356,13 +328,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, - int bmi2) + int flags) { #if DYNAMIC_BMI2 - if (bmi2) { + if (flags & HUF_flags_bmi2) { return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } #endif - (void)bmi2; + (void)flags; return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); } diff --git a/contrib/libs/zstd/lib/common/error_private.c b/contrib/libs/zstd/lib/common/error_private.c index 6d1135f8c3..075fc5ef42 100644 --- a/contrib/libs/zstd/lib/common/error_private.c +++ b/contrib/libs/zstd/lib/common/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -27,9 +27,11 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(version_unsupported): return "Version not supported"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(corruption_detected): return "Data corruption detected"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; @@ -38,17 +40,22 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; case PREFIX(srcSize_wrong): return "Src size is incorrect"; case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; + case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; + case PREFIX(externalSequences_invalid): return "External sequences are not valid"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/contrib/libs/zstd/lib/common/error_private.h b/contrib/libs/zstd/lib/common/error_private.h index 007d81066a..325daad404 100644 --- a/contrib/libs/zstd/lib/common/error_private.h +++ b/contrib/libs/zstd/lib/common/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/lib/common/fse.h b/contrib/libs/zstd/lib/common/fse.h index 714bfd3e7f..02a1f0bc53 100644 --- a/contrib/libs/zstd/lib/common/fse.h +++ b/contrib/libs/zstd/lib/common/fse.h @@ -1,7 +1,7 @@ /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -53,34 +53,6 @@ extern "C" { FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ -/*-**************************************** -* FSE simple functions -******************************************/ -/*! FSE_compress() : - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). - @return : size of compressed data (<= dstCapacity). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) -*/ -FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/*! FSE_decompress(): - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'dstCapacity'. - @return : size of regenerated data (<= maxDstSize), - or an error code, which can be tested using FSE_isError() . - - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! - Why ? : making this distinction requires a header. - Header management is intentionally delegated to the user layer, which can better manage special cases. -*/ -FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); - - /*-***************************************** * Tool functions ******************************************/ @@ -92,20 +64,6 @@ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error co /*-***************************************** -* FSE advanced functions -******************************************/ -/*! FSE_compress2() : - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' - Both parameters can be defined as '0' to mean : use default value - @return : size of compressed data - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), it's an error code. -*/ -FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - - -/*-***************************************** * FSE detailed API ******************************************/ /*! @@ -164,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); -FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); /*! FSE_buildCTable(): Builds `ct`, which must be already allocated, using FSE_createCTable(). @@ -241,23 +197,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize, int bmi2); -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); -FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); /*! Tutorial : @@ -320,16 +260,6 @@ If there is an error, the function will return an error code, which can be teste unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ -/* FSE_compress_wksp() : - * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. - */ -#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) -size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); - -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /**< build a fake FSE_CTable, designed to compress always the same symbolValue */ @@ -347,19 +277,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ -size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); -/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ - -size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); -/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ - -#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); -/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ - size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); -/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. + * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ typedef enum { FSE_repeat_none, /**< Cannot use the previous table */ @@ -555,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt /* FSE_getMaxNbBits() : * Approximate maximum cost of a symbol, in bits. - * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) * note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) diff --git a/contrib/libs/zstd/lib/common/fse_decompress.c b/contrib/libs/zstd/lib/common/fse_decompress.c index a5a358015f..1e1c9f92d6 100644 --- a/contrib/libs/zstd/lib/common/fse_decompress.c +++ b/contrib/libs/zstd/lib/common/fse_decompress.c @@ -1,6 +1,6 @@ /* ****************************************************************** * FSE : Finite State Entropy decoder - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -24,6 +24,7 @@ #include "error_private.h" #define ZSTD_DEPS_NEED_MALLOC #include "zstd_deps.h" +#include "bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -55,19 +56,6 @@ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -/* Function templates */ -FSE_DTable* FSE_createDTable (unsigned tableLog) -{ - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); -} - -void FSE_freeDTable (FSE_DTable* dt) -{ - ZSTD_free(dt); -} - static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ @@ -127,10 +115,10 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo } } /* Now we spread those positions across the table. - * The benefit of doing it in two stages is that we avoid the the + * The benefit of doing it in two stages is that we avoid the * variable size inner loop, which caused lots of branch misses. * Now we can run through all the positions without any branch misses. - * We unroll the loop twice, since that is what emperically worked best. + * We unroll the loop twice, since that is what empirically worked best. */ { size_t position = 0; @@ -166,7 +154,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo for (u=0; u<tableSize; u++) { FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); U32 const nextState = symbolNext[symbol]++; - tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); + tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) ); tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); } } @@ -184,49 +172,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi /*-******************************************************* * Decompression (Byte symbols) *********************************************************/ -size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const cell = (FSE_decode_t*)dPtr; - - DTableH->tableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - - -size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask+1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; s<maxSV1; s++) { - dinfo[s].newState = 0; - dinfo[s].symbol = (BYTE)s; - dinfo[s].nbBits = (BYTE)nbBits; - } - - return 0; -} FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( void* dst, size_t maxDstSize, @@ -290,26 +235,6 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( return op-ostart; } - -size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ - if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - - -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) -{ - return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); -} - typedef struct { short ncount[FSE_MAX_SYMBOL_VALUE + 1]; FSE_DTable dtable[1]; /* Dynamically sized */ @@ -342,7 +267,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( } if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); - workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); + workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); @@ -382,22 +308,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); } - -typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - -#ifndef ZSTD_NO_UNUSED_FUNCTIONS -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { - U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; - return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); -} - -size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) -{ - /* Static analyzer seems unable to understand this table will be properly initialized later */ - U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); -} -#endif - - #endif /* FSE_COMMONDEFS_ONLY */ diff --git a/contrib/libs/zstd/lib/common/huf.h b/contrib/libs/zstd/lib/common/huf.h index 85518481ec..73d1ee5654 100644 --- a/contrib/libs/zstd/lib/common/huf.h +++ b/contrib/libs/zstd/lib/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** * huff0 huffman codec, * part of Finite State Entropy library - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -21,99 +21,22 @@ extern "C" { /* *** Dependencies *** */ #include "zstd_deps.h" /* size_t */ - - -/* *** library symbols visibility *** */ -/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, - * HUF symbols remain "private" (internal symbols for library only). - * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define HUF_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ -#else -# define HUF_PUBLIC_API -#endif - - -/* ========================== */ -/* *** simple functions *** */ -/* ========================== */ - -/** HUF_compress() : - * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. - * 'dst' buffer must be already allocated. - * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. - * @return : size of compressed data (<= `dstCapacity`). - * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) - */ -HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/** HUF_decompress() : - * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', - * into already allocated buffer 'dst', of minimum size 'dstSize'. - * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. - * Note : in contrast with FSE, HUF_decompress can regenerate - * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, - * because it knows size to regenerate (originalSize). - * @return : size of regenerated data (== originalSize), - * or an error code, which can be tested using HUF_isError() - */ -HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize); +#include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" /* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ /* Error Management */ -HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ +unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ -/* *** Advanced function *** */ - -/** HUF_compress2() : - * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. - * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ -HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog); - -/** HUF_compress4X_wksp() : - * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) -HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize); - -#endif /* HUF_H_298734234 */ - -/* ****************************************************************** - * WARNING !! - * The following section contains advanced and experimental definitions - * which shall never be used in the context of a dynamic library, - * because they are not guaranteed to remain stable in the future. - * Only consider them in association with static linking. - * *****************************************************************/ -#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) -#define HUF_H_HUF_STATIC_LINKING_ONLY - -/* *** Dependencies *** */ -#include "mem.h" /* U32 */ -#define FSE_STATIC_LINKING_ONLY -#include "fse.h" - /* *** Constants *** */ #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ @@ -154,25 +77,49 @@ typedef U32 HUF_DTable; /* **************************************** * Advanced decompression functions ******************************************/ -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -#endif -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif +/** + * Huffman flags bitset. + * For all flags, 0 is the default value. + */ +typedef enum { + /** + * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. + * Otherwise: Ignored. + */ + HUF_flags_bmi2 = (1 << 0), + /** + * If set: Test possible table depths to find the one that produces the smallest header + encoded size. + * If unset: Use heuristic to find the table depth. + */ + HUF_flags_optimalDepth = (1 << 1), + /** + * If set: If the previous table can encode the input, always reuse the previous table. + * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. + */ + HUF_flags_preferRepeat = (1 << 2), + /** + * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. + * If unset: Always histogram the entire input. + */ + HUF_flags_suspectUncompressible = (1 << 3), + /** + * If set: Don't use assembly implementations + * If unset: Allow using assembly implementations + */ + HUF_flags_disableAsm = (1 << 4), + /** + * If set: Don't use the fast decoding loop, always use the fallback decoding loop. + * If unset: Use the fast decoding loop when possible. + */ + HUF_flags_disableFast = (1 << 5) +} HUF_flags_e; /* **************************************** * HUF detailed API * ****************************************/ +#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra /*! HUF_compress() does the following: * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") @@ -185,12 +132,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, * For example, it's possible to compress several blocks using the same 'CTable', * or to save and regenerate 'CTable' using external methods. */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +unsigned HUF_minTableLog(unsigned symbolCardinality); +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, + size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); @@ -199,6 +146,7 @@ typedef enum { HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ } HUF_repeat; + /** HUF_compress4X_repeat() : * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. @@ -209,13 +157,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. */ -#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, @@ -241,7 +189,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize, void* workspace, size_t wkspSize, - int bmi2); + int flags); /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ @@ -279,32 +227,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif - -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif - /* ====================== */ /* single stream variants */ /* ====================== */ -size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); /** HUF_compress1X_repeat() : * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * If it uses hufTable it does not modify hufTable or repeat. @@ -315,49 +243,30 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -#endif - -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */ #endif /* BMI2 variants. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. */ -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); #ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); #ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); #endif #ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); #endif -#endif /* HUF_STATIC_LINKING_ONLY */ +#endif /* HUF_H_298734234 */ #if defined (__cplusplus) } diff --git a/contrib/libs/zstd/lib/common/mem.h b/contrib/libs/zstd/lib/common/mem.h index 85581c3847..98dd47a047 100644 --- a/contrib/libs/zstd/lib/common/mem.h +++ b/contrib/libs/zstd/lib/common/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -133,21 +133,15 @@ MEM_STATIC size_t MEM_swapST(size_t in); /*-************************************************************** * Memory I/O Implementation *****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) + * Default : method 1 if supported, else method 0 */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# ifdef __GNUC__ # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -190,30 +184,19 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; +typedef __attribute__((aligned(1))) size_t unalignArch; -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } #else @@ -257,6 +240,14 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) #endif /* MEM_FORCE_MEMORY_ACCESS */ +MEM_STATIC U32 MEM_swap32_fallback(U32 in) +{ + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +} + MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ @@ -265,22 +256,13 @@ MEM_STATIC U32 MEM_swap32(U32 in) || (defined(__clang__) && __has_builtin(__builtin_bswap32)) return __builtin_bswap32(in); #else - return ((in << 24) & 0xff000000 ) | - ((in << 8) & 0x00ff0000 ) | - ((in >> 8) & 0x0000ff00 ) | - ((in >> 24) & 0x000000ff ); + return MEM_swap32_fallback(in); #endif } -MEM_STATIC U64 MEM_swap64(U64 in) +MEM_STATIC U64 MEM_swap64_fallback(U64 in) { -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_uint64(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap64)) - return __builtin_bswap64(in); -#else - return ((in << 56) & 0xff00000000000000ULL) | + return ((in << 56) & 0xff00000000000000ULL) | ((in << 40) & 0x00ff000000000000ULL) | ((in << 24) & 0x0000ff0000000000ULL) | ((in << 8) & 0x000000ff00000000ULL) | @@ -288,6 +270,17 @@ MEM_STATIC U64 MEM_swap64(U64 in) ((in >> 24) & 0x0000000000ff0000ULL) | ((in >> 40) & 0x000000000000ff00ULL) | ((in >> 56) & 0x00000000000000ffULL); +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return MEM_swap64_fallback(in); #endif } diff --git a/contrib/libs/zstd/lib/common/pool.c b/contrib/libs/zstd/lib/common/pool.c index 2e37cdd73c..f3d9d08547 100644 --- a/contrib/libs/zstd/lib/common/pool.c +++ b/contrib/libs/zstd/lib/common/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,7 +12,7 @@ /* ====== Dependencies ======= */ #include "zstd_deps.h" /* size_t */ #include "debug.h" /* assert */ -#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */ +#include "zstd_internal.h" /* ZSTD_customCalloc, ZSTD_customFree */ #include "pool.h" /* ====== Compiler specifics ====== */ @@ -96,9 +96,7 @@ static void* POOL_thread(void* opaque) { /* If the intended queue size was 0, signal after finishing job */ ZSTD_pthread_mutex_lock(&ctx->queueMutex); ctx->numThreadsBusy--; - if (ctx->queueSize == 1) { - ZSTD_pthread_cond_signal(&ctx->queuePushCond); - } + ZSTD_pthread_cond_signal(&ctx->queuePushCond); ZSTD_pthread_mutex_unlock(&ctx->queueMutex); } } /* for (;;) */ @@ -128,7 +126,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, * empty and full queues. */ ctx->queueSize = queueSize + 1; - ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem); ctx->queueHead = 0; ctx->queueTail = 0; ctx->numThreadsBusy = 0; @@ -142,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, } ctx->shutdown = 0; /* Allocate space for the thread handles */ - ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); ctx->threadCapacity = 0; ctx->customMem = customMem; /* Check for errors */ @@ -175,7 +173,7 @@ static void POOL_join(POOL_ctx* ctx) { /* Join all of the threads */ { size_t i; for (i = 0; i < ctx->threadCapacity; ++i) { - ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */ } } } @@ -190,6 +188,17 @@ void POOL_free(POOL_ctx *ctx) { ZSTD_customFree(ctx, ctx->customMem); } +/*! POOL_joinJobs() : + * Waits for all queued jobs to finish executing. + */ +void POOL_joinJobs(POOL_ctx* ctx) { + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { POOL_free (pool); } @@ -211,7 +220,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) return 0; } /* numThreads > threadCapacity */ - { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); if (!threadPool) return 1; /* replace existing thread pool */ ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); @@ -262,7 +271,9 @@ static int isQueueFull(POOL_ctx const* ctx) { static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) { - POOL_job const job = {function, opaque}; + POOL_job job; + job.function = function; + job.opaque = opaque; assert(ctx != NULL); if (ctx->shutdown) return; @@ -330,6 +341,11 @@ void POOL_free(POOL_ctx* ctx) { (void)ctx; } +void POOL_joinJobs(POOL_ctx* ctx){ + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + int POOL_resize(POOL_ctx* ctx, size_t numThreads) { (void)ctx; (void)numThreads; return 0; diff --git a/contrib/libs/zstd/lib/common/pool.h b/contrib/libs/zstd/lib/common/pool.h index 0ebde1805d..eb22ff509f 100644 --- a/contrib/libs/zstd/lib/common/pool.h +++ b/contrib/libs/zstd/lib/common/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -38,6 +38,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, */ void POOL_free(POOL_ctx* ctx); + +/*! POOL_joinJobs() : + * Waits for all queued jobs to finish executing. + */ +void POOL_joinJobs(POOL_ctx* ctx); + /*! POOL_resize() : * Expands or shrinks pool's number of threads. * This is more efficient than releasing + creating a new context, diff --git a/contrib/libs/zstd/lib/common/portability_macros.h b/contrib/libs/zstd/lib/common/portability_macros.h index 2143817f57..8fd6ea82d1 100644 --- a/contrib/libs/zstd/lib/common/portability_macros.h +++ b/contrib/libs/zstd/lib/common/portability_macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,7 +12,7 @@ #define ZSTD_PORTABILITY_MACROS_H /** - * This header file contains macro defintions to support portability. + * This header file contains macro definitions to support portability. * This header is shared between C and ASM code, so it MUST only * contain macro definitions. It MUST not contain any C code. * @@ -88,7 +88,7 @@ #endif /** - * Only enable assembly for GNUC comptabile compilers, + * Only enable assembly for GNUC compatible compilers, * because other platforms may not support GAS assembly syntax. * * Only enable assembly for Linux / MacOS, other platforms may @@ -134,4 +134,23 @@ # define ZSTD_ENABLE_ASM_X86_64_BMI2 0 #endif +/* + * For x86 ELF targets, add .note.gnu.property section for Intel CET in + * assembly sources when CET is enabled. + * + * Additionally, any function that may be called indirectly must begin + * with ZSTD_CET_ENDBRANCH. + */ +#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ + && defined(__has_include) +# if __has_include(<cet.h>) +# include <cet.h> +# define ZSTD_CET_ENDBRANCH _CET_ENDBR +# endif +#endif + +#ifndef ZSTD_CET_ENDBRANCH +# define ZSTD_CET_ENDBRANCH +#endif + #endif /* ZSTD_PORTABILITY_MACROS_H */ diff --git a/contrib/libs/zstd/lib/common/threading.c b/contrib/libs/zstd/lib/common/threading.c index 92cf57c195..f2341105a1 100644 --- a/contrib/libs/zstd/lib/common/threading.c +++ b/contrib/libs/zstd/lib/common/threading.c @@ -23,8 +23,7 @@ int g_ZSTD_threading_useless_symbol; #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) /** - * Windows minimalist Pthread Wrapper, based on : - * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + * Windows minimalist Pthread Wrapper */ @@ -35,37 +34,92 @@ int g_ZSTD_threading_useless_symbol; /* === Implementation === */ +typedef struct { + void* (*start_routine)(void*); + void* arg; + int initialized; + ZSTD_pthread_cond_t initialized_cond; + ZSTD_pthread_mutex_t initialized_mutex; +} ZSTD_thread_params_t; + static unsigned __stdcall worker(void *arg) { - ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; - thread->arg = thread->start_routine(thread->arg); + void* (*start_routine)(void*); + void* thread_arg; + + /* Inialized thread_arg and start_routine and signal main thread that we don't need it + * to wait any longer. + */ + { + ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg; + thread_arg = thread_param->arg; + start_routine = thread_param->start_routine; + + /* Signal main thread that we are running and do not depend on its memory anymore */ + ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex); + thread_param->initialized = 1; + ZSTD_pthread_cond_signal(&thread_param->initialized_cond); + ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex); + } + + start_routine(thread_arg); + return 0; } int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, void* (*start_routine) (void*), void* arg) { + ZSTD_thread_params_t thread_param; (void)unused; - thread->arg = arg; - thread->start_routine = start_routine; - thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); - if (!thread->handle) + thread_param.start_routine = start_routine; + thread_param.arg = arg; + thread_param.initialized = 0; + *thread = NULL; + + /* Setup thread initialization synchronization */ + if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) { + /* Should never happen on Windows */ + return -1; + } + if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) { + /* Should never happen on Windows */ + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + return -1; + } + + /* Spawn thread */ + *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL); + if (!thread) { + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); return errno; - else - return 0; + } + + /* Wait for thread to be initialized */ + ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex); + while(!thread_param.initialized) { + ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex); + } + ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex); + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + + return 0; } -int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +int ZSTD_pthread_join(ZSTD_pthread_t thread) { DWORD result; - if (!thread.handle) return 0; + if (!thread) return 0; + + result = WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); - result = WaitForSingleObject(thread.handle, INFINITE); switch (result) { case WAIT_OBJECT_0: - if (value_ptr) *value_ptr = thread.arg; return 0; case WAIT_ABANDONED: return EINVAL; diff --git a/contrib/libs/zstd/lib/common/threading.h b/contrib/libs/zstd/lib/common/threading.h index fd0060d5aa..fb5c1c8787 100644 --- a/contrib/libs/zstd/lib/common/threading.h +++ b/contrib/libs/zstd/lib/common/threading.h @@ -23,8 +23,7 @@ extern "C" { #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) /** - * Windows minimalist Pthread Wrapper, based on : - * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + * Windows minimalist Pthread Wrapper */ #ifdef WINVER # undef WINVER @@ -62,16 +61,12 @@ extern "C" { #define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) /* ZSTD_pthread_create() and ZSTD_pthread_join() */ -typedef struct { - HANDLE handle; - void* (*start_routine)(void*); - void* arg; -} ZSTD_pthread_t; +typedef HANDLE ZSTD_pthread_t; int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, void* (*start_routine) (void*), void* arg); -int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); +int ZSTD_pthread_join(ZSTD_pthread_t thread); /** * add here more wrappers as required @@ -99,7 +94,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); #define ZSTD_pthread_t pthread_t #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) -#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) #else /* DEBUGLEVEL >= 1 */ @@ -124,7 +119,7 @@ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); #define ZSTD_pthread_t pthread_t #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) -#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) #endif diff --git a/contrib/libs/zstd/lib/common/zstd_common.c b/contrib/libs/zstd/lib/common/zstd_common.c index 3d7e35b309..3208552475 100644 --- a/contrib/libs/zstd/lib/common/zstd_common.c +++ b/contrib/libs/zstd/lib/common/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/lib/common/zstd_deps.h b/contrib/libs/zstd/lib/common/zstd_deps.h index 14211344a0..4d767ae9b0 100644 --- a/contrib/libs/zstd/lib/common/zstd_deps.h +++ b/contrib/libs/zstd/lib/common/zstd_deps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/lib/common/zstd_internal.h b/contrib/libs/zstd/lib/common/zstd_internal.h index 1dee37cdbe..54792712fc 100644 --- a/contrib/libs/zstd/lib/common/zstd_internal.h +++ b/contrib/libs/zstd/lib/common/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -28,7 +28,6 @@ #include "../zstd.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" -#define HUF_STATIC_LINKING_ONLY #include "huf.h" #ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ @@ -93,9 +92,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; #define ZSTD_FRAMECHECKSUMSIZE 4 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ +#define MIN_LITERALS_FOR_4_STREAMS 6 -#define HufLog 12 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; #define LONGNBSEQ 0x7F00 @@ -103,6 +102,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define MINMATCH 3 #define Litbits 8 +#define LitHufLog 11 #define MaxLit ((1<<Litbits) - 1) #define MaxML 52 #define MaxLL 35 @@ -113,6 +113,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define LLFSELog 9 #define OffFSELog 8 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) +#define MaxMLBits 16 +#define MaxLLBits 16 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ /* Each table cannot take more than #symbols * FSELog bits */ @@ -235,12 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e * one COPY16() in the first call. Then, do two calls per loop since * at that point it is more likely to have a high trip count. */ -#ifdef __aarch64__ - do { - COPY16(op, ip); - } - while (op < oend); -#else ZSTD_copy16(op, ip); if (16 >= length) return; op += 16; @@ -250,7 +246,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e COPY16(op, ip); } while (op < oend); -#endif } } @@ -299,11 +294,11 @@ typedef enum { typedef struct { seqDef* sequencesStart; seqDef* sequences; /* ptr to end of sequences */ - BYTE* litStart; - BYTE* lit; /* ptr to end of literals */ - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; size_t maxNbSeq; size_t maxNbLit; @@ -311,8 +306,8 @@ typedef struct { * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment * the existing value of the litLength or matchLength by 0x10000. */ - ZSTD_longLengthType_e longLengthType; - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ } seqStore_t; typedef struct { @@ -331,10 +326,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore seqLen.matchLength = seq->mlBase + MINMATCH; if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { if (seqStore->longLengthType == ZSTD_llt_literalLength) { - seqLen.litLength += 0xFFFF; + seqLen.litLength += 0x10000; } if (seqStore->longLengthType == ZSTD_llt_matchLength) { - seqLen.matchLength += 0xFFFF; + seqLen.matchLength += 0x10000; } } return seqLen; @@ -347,12 +342,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` */ typedef struct { + size_t nbBlocks; size_t compressedSize; unsigned long long decompressedBound; } ZSTD_frameSizeInfo; /* decompress & legacy */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ /* custom memory allocation functions */ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); @@ -360,98 +356,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); -MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ -# if STATIC_BMI2 == 1 - return _lzcnt_u32(val)^31; -# else - if (val != 0) { - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif - } -} - -/** - * Counts the number of trailing zeros of a `size_t`. - * Most compilers should support CTZ as a builtin. A backup - * implementation is provided if the builtin isn't supported, but - * it may not be terribly efficient. - */ -MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) -{ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) -# if STATIC_BMI2 - return _tzcnt_u64(val); -# else - if (val != 0) { - unsigned long r; - _BitScanForward64(&r, (U64)val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# endif -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return __builtin_ctzll((U64)val); -# else - static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, - 4, 25, 14, 28, 9, 34, 20, 56, - 5, 17, 26, 54, 15, 41, 29, 43, - 10, 31, 38, 35, 21, 45, 49, 57, - 63, 6, 12, 18, 24, 27, 33, 55, - 16, 53, 40, 42, 30, 37, 44, 48, - 62, 11, 23, 32, 52, 39, 36, 47, - 61, 22, 51, 46, 60, 50, 59, 58 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - if (val != 0) { - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)r; - } else { - /* Should not reach this code path */ - __assume(0); - } -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return __builtin_ctz((U32)val); -# else - static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, - 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, - 26, 12, 18, 6, 11, 5, 10, 9 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } -} - - /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. * Note : only works with regular variant; diff --git a/contrib/libs/zstd/lib/common/zstd_trace.h b/contrib/libs/zstd/lib/common/zstd_trace.h index f9121f7d8e..da20534ebd 100644 --- a/contrib/libs/zstd/lib/common/zstd_trace.h +++ b/contrib/libs/zstd/lib/common/zstd_trace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -21,13 +21,13 @@ extern "C" { * For now, enable conservatively: * - Only GNUC * - Only ELF - * - Only x86-64 and i386 + * - Only x86-64, i386 and aarch64 * Also, explicitly disable on platforms known not to work so they aren't * forgotten in the future. */ #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \ defined(__GNUC__) && defined(__ELF__) && \ - (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \ !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ !defined(__CYGWIN__) && !defined(_AIX) # define ZSTD_HAVE_WEAK_SYMBOLS 1 |