diff options
| author | uzhas <[email protected]> | 2024-10-03 16:34:24 +0300 |
|---|---|---|
| committer | uzhas <[email protected]> | 2024-10-03 16:44:27 +0300 |
| commit | d7525e0eec8e7242a5cedd5fbdaf3bdaaeea02c7 (patch) | |
| tree | ce9f81f6cf7c08a4dd02d982b956b3b7c7637a3f /contrib/libs/simdjson/include | |
| parent | 58e24167b404650b5a636bfa016b291d24d9830d (diff) | |
ydblib: add simdjson
commit_hash:a86d76c6047d0b7407a8f1e013ccecf0e3c886eb
Diffstat (limited to 'contrib/libs/simdjson/include')
96 files changed, 8538 insertions, 0 deletions
diff --git a/contrib/libs/simdjson/include/simdjson/arm64.h b/contrib/libs/simdjson/include/simdjson/arm64.h new file mode 100644 index 00000000000..1493c3562a5 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64.h @@ -0,0 +1,8 @@ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + +#include "simdjson/arm64/begin.h" +#include "simdjson/generic/amalgamated.h" +#include "simdjson/arm64/end.h" + +#endif // SIMDJSON_ARM64_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/arm64/base.h b/contrib/libs/simdjson/include/simdjson/arm64/base.h new file mode 100644 index 00000000000..8f23d18d80e --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/base.h @@ -0,0 +1,26 @@ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template <typename T> struct simd8; +template <typename T> struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/begin.h b/contrib/libs/simdjson/include/simdjson/arm64/begin.h new file mode 100644 index 00000000000..ee48cec0515 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/begin.h @@ -0,0 +1,10 @@ +#define SIMDJSON_IMPLEMENTATION arm64 +#include "simdjson/arm64/base.h" +#include "simdjson/arm64/intrinsics.h" +#include "simdjson/arm64/bitmanipulation.h" +#include "simdjson/arm64/bitmask.h" +#include "simdjson/arm64/numberparsing_defs.h" +#include "simdjson/arm64/simd.h" +#include "simdjson/arm64/stringparsing_defs.h" + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/arm64/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/arm64/bitmanipulation.h new file mode 100644 index 00000000000..019869b1b0b --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/bitmanipulation.h @@ -0,0 +1,106 @@ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#include "simdjson/arm64/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/bitmask.h b/contrib/libs/simdjson/include/simdjson/arm64/bitmask.h new file mode 100644 index 00000000000..5d6121bcc7b --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/bitmask.h @@ -0,0 +1,44 @@ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif diff --git a/contrib/libs/simdjson/include/simdjson/arm64/end.h b/contrib/libs/simdjson/include/simdjson/arm64/end.h new file mode 100644 index 00000000000..b92378df97f --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/end.h @@ -0,0 +1,6 @@ +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +#undef SIMDJSON_IMPLEMENTATION diff --git a/contrib/libs/simdjson/include/simdjson/arm64/implementation.h b/contrib/libs/simdjson/include/simdjson/arm64/implementation.h new file mode 100644 index 00000000000..c9b7dd75350 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/implementation.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/intrinsics.h b/contrib/libs/simdjson/include/simdjson/arm64/intrinsics.h new file mode 100644 index 00000000000..049d9986102 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/intrinsics.h @@ -0,0 +1,14 @@ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// This should be the correct header whether +// you use visual studio or other compilers. +#include <arm_neon.h> + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/arm64/numberparsing_defs.h new file mode 100644 index 00000000000..f5e85b74779 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/numberparsing_defs.h @@ -0,0 +1,56 @@ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#include "simdjson/arm64/intrinsics.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <cstring> + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include <intrin.h> +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/simd.h b/contrib/libs/simdjson/include/simdjson/arm64/simd.h new file mode 100644 index 00000000000..3b0fa844f49 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/simd.h @@ -0,0 +1,497 @@ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#include "simdjson/arm64/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template<typename T> + struct simd8; + + // + // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t internally. + // + template<typename T, typename Mask=simd8<bool>> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8<T> operator|(const simd8<T> other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8<T> operator&(const simd8<T> other) const { return vandq_u8(*this, other); } + simdjson_inline simd8<T> operator^(const simd8<T> other) const { return veorq_u8(*this, other); } + simdjson_inline simd8<T> bit_andnot(const simd8<T> other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8<T>& operator|=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8<T>& operator&=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8<T>& operator^=(const simd8<T> other) { auto this_cast = static_cast<simd8<T>*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return vceqq_u8(lhs, rhs); } + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base_u8<bool> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8<bool> splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8<bool>(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base_u8<uint8_t> { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8<uint8_t>(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { *this = *this + other; return *this; } + simdjson_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return vminq_u8(*this, other); } + simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); } + + // Bit-specific operations + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return vshrq_n_u8(*this, N); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8<L> compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template<typename L> + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast<const uint8_t *>(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast<uint8_t*>(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template<typename L> + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template<typename T> + simdjson_inline simd8<uint8_t> apply_lookup_16_to(const simd8<T> original) { + return vqtbl1q_u8(*this, simd8<uint8_t>(original)); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> { + int8x16_t value; + + static simdjson_inline simd8<int8_t> splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8<int8_t> zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8<int8_t> load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8<uint8_t>() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { *this = *this + other; return *this; } + simdjson_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return vminq_s8(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8<bool> operator==(const simd8<int8_t> other) const { return vceqq_s8(*this, other); } + + template<int N=1> + simdjson_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template<typename T> + simdjson_inline simd8<int8_t> apply_lookup_16_to(const simd8<T> original) { + return vqtbl1q_s8(*this, simd8<uint8_t>(original)); + } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + this->chunks[1].store(ptr+sizeof(simd8<T>)*1); + this->chunks[2].store(ptr+sizeof(simd8<T>)*2); + this->chunks[3].store(ptr+sizeof(simd8<T>)*3); + } + + simdjson_inline simd8<T> reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64<T> + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H diff --git a/contrib/libs/simdjson/include/simdjson/arm64/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/arm64/stringparsing_defs.h new file mode 100644 index 00000000000..30d02faff3f --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/arm64/stringparsing_defs.h @@ -0,0 +1,53 @@ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/arm64/base.h" +#include "simdjson/arm64/simd.h" +#include "simdjson/arm64/bitmanipulation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v0(src); + simd8<uint8_t> v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/base.h b/contrib/libs/simdjson/include/simdjson/base.h new file mode 100644 index 00000000000..a73b84bd082 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/base.h @@ -0,0 +1,60 @@ +/** + * @file Base declarations for all simdjson headers + * @private + */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +#include "simdjson/common_defs.h" +#include "simdjson/compiler_check.h" +#include "simdjson/error.h" +#include "simdjson/portability.h" + +/** + * @brief The top level simdjson namespace, containing everything the library provides. + */ +namespace simdjson { + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +class implementation; +struct padded_string; +class padded_string_view; +enum class stage1_mode; + +namespace internal { + +template<typename T> +class atomic_ptr; +class dom_parser_implementation; +class escape_json_string; +class tape_ref; +struct value128; +enum class tape_type; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/common_defs.h b/contrib/libs/simdjson/include/simdjson/common_defs.h new file mode 100644 index 00000000000..d0ae083ecba --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/common_defs.h @@ -0,0 +1,347 @@ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include <cassert> +#include "simdjson/compiler_check.h" +#include "simdjson/portability.h" + +namespace simdjson { +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __declspec(deprecated) + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include(<CppCoreCheck\Warnings.h>) + #error #include <CppCoreCheck\Warnings.h> + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __attribute__((deprecated)) + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if SIMDJSON_CLANG_VISUAL_STUDIO + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused) + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include <string_view> // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include (<string_view>) +// now it is safe to trigger the include +#include <string_view> // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include (<string_view>) +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +#error #include "simdjson/nonstd/string_view.hpp" +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include(<avx512vbmi2intrin.h>) +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +#endif // SIMDJSON_COMMON_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/compiler_check.h b/contrib/libs/simdjson/include/simdjson/compiler_check.h new file mode 100644 index 00000000000..1d0d03d1545 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/compiler_check.h @@ -0,0 +1,53 @@ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H diff --git a/contrib/libs/simdjson/include/simdjson/dom/base.h b/contrib/libs/simdjson/include/simdjson/dom/base.h new file mode 100644 index 00000000000..b862277a211 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/dom/base.h @@ -0,0 +1,54 @@ +#ifndef SIMDJSON_DOM_BASE_H +#define SIMDJSON_DOM_BASE_H + +#include "simdjson/base.h" + +namespace simdjson { + +/** + * @brief A DOM API on top of the simdjson parser. + */ +namespace dom { + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +class array; +class document; +class document_stream; +class element; +class key_value_pair; +class object; +class parser; + +#ifdef SIMDJSON_THREADS_ENABLED +struct stage1_worker; +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +namespace internal { + +template<typename T> +class string_builder; +class tape_ref; + +} // namespace internal + +} // namespace simdjson + +#endif // SIMDJSON_DOM_BASE_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/dom/document.h b/contrib/libs/simdjson/include/simdjson/dom/document.h new file mode 100644 index 00000000000..6c5e284bb47 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/dom/document.h @@ -0,0 +1,91 @@ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +#include "simdjson/dom/base.h" + +#include <memory> + +namespace simdjson { +namespace dom { + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr<uint64_t[]> tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr<uint8_t[]> string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H diff --git a/contrib/libs/simdjson/include/simdjson/error-inl.h b/contrib/libs/simdjson/include/simdjson/error-inl.h new file mode 100644 index 00000000000..5cf61d9c307 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/error-inl.h @@ -0,0 +1,184 @@ +#ifndef SIMDJSON_ERROR_INL_H +#define SIMDJSON_ERROR_INL_H + +#include "simdjson/error.h" + +#include <iostream> + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base<T> inline implementation +// + +template<typename T> +simdjson_inline void simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward<simdjson_result_base<T>>(*this).first; + } +} + +template<typename T> +simdjson_warn_unused simdjson_inline error_code simdjson_result_base<T>::get(T &value) && noexcept { + error_code error; + std::forward<simdjson_result_base<T>>(*this).tie(value, error); + return error; +} + +template<typename T> +simdjson_inline error_code simdjson_result_base<T>::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline T& simdjson_result_base<T>::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template<typename T> +simdjson_inline T&& simdjson_result_base<T>::value() && noexcept(false) { + return std::forward<simdjson_result_base<T>>(*this).take_value(); +} + +template<typename T> +simdjson_inline T&& simdjson_result_base<T>::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward<T>(this->first); +} + +template<typename T> +simdjson_inline simdjson_result_base<T>::operator T&&() && noexcept(false) { + return std::forward<simdjson_result_base<T>>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline const T& simdjson_result_base<T>::value_unsafe() const& noexcept { + return this->first; +} + +template<typename T> +simdjson_inline T&& simdjson_result_base<T>::value_unsafe() && noexcept { + return std::forward<T>(this->first); +} + +template<typename T> +simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair<T, error_code>(std::forward<T>(value), error) {} +template<typename T> +simdjson_inline simdjson_result_base<T>::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template<typename T> +simdjson_inline simdjson_result_base<T>::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward<T>(value), SUCCESS) {} +template<typename T> +simdjson_inline simdjson_result_base<T>::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result<T> inline implementation +/// + +template<typename T> +simdjson_inline void simdjson_result<T>::tie(T &value, error_code &error) && noexcept { + std::forward<internal::simdjson_result_base<T>>(*this).tie(value, error); +} + +template<typename T> +simdjson_warn_unused simdjson_inline error_code simdjson_result<T>::get(T &value) && noexcept { + return std::forward<internal::simdjson_result_base<T>>(*this).get(value); +} + +template<typename T> +simdjson_inline error_code simdjson_result<T>::error() const noexcept { + return internal::simdjson_result_base<T>::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline T& simdjson_result<T>::value() & noexcept(false) { + return internal::simdjson_result_base<T>::value(); +} + +template<typename T> +simdjson_inline T&& simdjson_result<T>::value() && noexcept(false) { + return std::forward<internal::simdjson_result_base<T>>(*this).value(); +} + +template<typename T> +simdjson_inline T&& simdjson_result<T>::take_value() && noexcept(false) { + return std::forward<internal::simdjson_result_base<T>>(*this).take_value(); +} + +template<typename T> +simdjson_inline simdjson_result<T>::operator T&&() && noexcept(false) { + return std::forward<internal::simdjson_result_base<T>>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline const T& simdjson_result<T>::value_unsafe() const& noexcept { + return internal::simdjson_result_base<T>::value_unsafe(); +} + +template<typename T> +simdjson_inline T&& simdjson_result<T>::value_unsafe() && noexcept { + return std::forward<internal::simdjson_result_base<T>>(*this).value_unsafe(); +} + +template<typename T> +simdjson_inline simdjson_result<T>::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base<T>(std::forward<T>(value), error) {} +template<typename T> +simdjson_inline simdjson_result<T>::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base<T>(error) {} +template<typename T> +simdjson_inline simdjson_result<T>::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base<T>(std::forward<T>(value)) {} +template<typename T> +simdjson_inline simdjson_result<T>::simdjson_result() noexcept + : internal::simdjson_result_base<T>() {} + +} // namespace simdjson + +#endif // SIMDJSON_ERROR_INL_H diff --git a/contrib/libs/simdjson/include/simdjson/error.h b/contrib/libs/simdjson/include/simdjson/error.h new file mode 100644 index 00000000000..4848f6ef912 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/error.h @@ -0,0 +1,318 @@ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +#include "simdjson/base.h" + +#include <string> +#include <ostream> + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + BIGINT_ERROR, ///< The integer value exceeds 64 bits + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether + * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code + * that was written while breaking some simdjson::ondemand requirement. They should not occur in released + * code after these issues were fixed. + */ + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result<T> : public internal::simdjson_result_base<T> { + * simdjson_result() noexcept : internal::simdjson_result_base<T>() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base<T>(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base<T>(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base<T>(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result<T> will be chainable with your methods. + */ +template<typename T> +struct simdjson_result_base : protected std::pair<T, error_code> { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template<typename T> +struct simdjson_result : public internal::simdjson_result_base<T> { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template<typename T> +inline std::ostream& operator<<(std::ostream& out, simdjson_result<T> value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H diff --git a/contrib/libs/simdjson/include/simdjson/fallback.h b/contrib/libs/simdjson/include/simdjson/fallback.h new file mode 100644 index 00000000000..4588cdc00f0 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback.h @@ -0,0 +1,8 @@ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + +#include "simdjson/fallback/begin.h" +#include "simdjson/generic/amalgamated.h" +#include "simdjson/fallback/end.h" + +#endif // SIMDJSON_FALLBACK_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/fallback/base.h b/contrib/libs/simdjson/include/simdjson/fallback/base.h new file mode 100644 index 00000000000..99cb37423d7 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/base.h @@ -0,0 +1,19 @@ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/fallback/begin.h b/contrib/libs/simdjson/include/simdjson/fallback/begin.h new file mode 100644 index 00000000000..74749f6d456 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/begin.h @@ -0,0 +1,5 @@ +#define SIMDJSON_IMPLEMENTATION fallback +#include "simdjson/fallback/base.h" +#include "simdjson/fallback/bitmanipulation.h" +#include "simdjson/fallback/stringparsing_defs.h" +#include "simdjson/fallback/numberparsing_defs.h" diff --git a/contrib/libs/simdjson/include/simdjson/fallback/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/fallback/bitmanipulation.h new file mode 100644 index 00000000000..ba47dcccfbf --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/bitmanipulation.h @@ -0,0 +1,48 @@ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/fallback/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/fallback/end.h b/contrib/libs/simdjson/include/simdjson/fallback/end.h new file mode 100644 index 00000000000..fbd14132b13 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/end.h @@ -0,0 +1,5 @@ +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/fallback/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#undef SIMDJSON_IMPLEMENTATION diff --git a/contrib/libs/simdjson/include/simdjson/fallback/implementation.h b/contrib/libs/simdjson/include/simdjson/fallback/implementation.h new file mode 100644 index 00000000000..523f06d2e92 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/implementation.h @@ -0,0 +1,34 @@ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/fallback/base.h" +#include "simdjson/implementation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<simdjson::internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/fallback/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/fallback/numberparsing_defs.h new file mode 100644 index 00000000000..e807423e6f8 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/numberparsing_defs.h @@ -0,0 +1,80 @@ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/fallback/base.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <cstring> + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast<const char *>(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/fallback/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/fallback/stringparsing_defs.h new file mode 100644 index 00000000000..64f23c4b03d --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/fallback/stringparsing_defs.h @@ -0,0 +1,36 @@ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/fallback/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/generic/amalgamated.h b/contrib/libs/simdjson/include/simdjson/generic/amalgamated.h new file mode 100644 index 00000000000..4b235fa9f7b --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/amalgamated.h @@ -0,0 +1,12 @@ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +#include "simdjson/generic/base.h" +#include "simdjson/generic/jsoncharutils.h" +#include "simdjson/generic/atomparsing.h" +#include "simdjson/generic/dom_parser_implementation.h" +#include "simdjson/generic/implementation_simdjson_result_base.h" +#include "simdjson/generic/numberparsing.h" + +#include "simdjson/generic/implementation_simdjson_result_base-inl.h" diff --git a/contrib/libs/simdjson/include/simdjson/generic/atomparsing.h b/contrib/libs/simdjson/include/simdjson/generic/atomparsing.h new file mode 100644 index 00000000000..2eeb7b81a26 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/atomparsing.h @@ -0,0 +1,77 @@ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_ATOMPARSING_H +#include "simdjson/generic/base.h" +#include "simdjson/generic/jsoncharutils.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <cstring> + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/base.h b/contrib/libs/simdjson/include/simdjson/generic/base.h new file mode 100644 index 00000000000..65180837229 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/base.h @@ -0,0 +1,51 @@ +#ifndef SIMDJSON_GENERIC_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_BASE_H +#include "simdjson/base.h" +// If we haven't got an implementation yet, we're in the editor, editing a generic file! Just +// use the most advanced one we can so the most possible stuff can be tested. +#ifndef SIMDJSON_IMPLEMENTATION +#include "simdjson/implementation_detection.h" +#if SIMDJSON_IMPLEMENTATION_ICELAKE +#include "simdjson/icelake/begin.h" +#elif SIMDJSON_IMPLEMENTATION_HASWELL +#include "simdjson/haswell/begin.h" +#elif SIMDJSON_IMPLEMENTATION_WESTMERE +#include "simdjson/westmere/begin.h" +#elif SIMDJSON_IMPLEMENTATION_ARM64 +#include "simdjson/arm64/begin.h" +#elif SIMDJSON_IMPLEMENTATION_PPC64 +#error #include "simdjson/ppc64/begin.h" +#elif SIMDJSON_IMPLEMENTATION_LSX +#include "simdjson/lsx/begin.h" +#elif SIMDJSON_IMPLEMENTATION_LASX +#include "simdjson/lasx/begin.h" +#elif SIMDJSON_IMPLEMENTATION_FALLBACK +#include "simdjson/fallback/begin.h" +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif +#endif // SIMDJSON_IMPLEMENTATION +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/dependencies.h b/contrib/libs/simdjson/include/simdjson/generic/dependencies.h new file mode 100644 index 00000000000..28d9749f276 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/dependencies.h @@ -0,0 +1,20 @@ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H + +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. +#include "simdjson/base.h" +#include "simdjson/implementation.h" +#include "simdjson/implementation_detection.h" +#include "simdjson/internal/instruction_set.h" +#include "simdjson/internal/dom_parser_implementation.h" +#include "simdjson/internal/jsoncharutils_tables.h" +#include "simdjson/internal/numberparsing_tables.h" +#include "simdjson/internal/simdprune_tables.h" + +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/dom_parser_implementation.h b/contrib/libs/simdjson/include/simdjson/generic/dom_parser_implementation.h new file mode 100644 index 00000000000..e51d2c1279b --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/dom_parser_implementation.h @@ -0,0 +1,89 @@ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#include "simdjson/generic/base.h" +#include "simdjson/internal/dom_parser_implementation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr<open_container[]> open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr<bool[]> is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base-inl.h b/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base-inl.h new file mode 100644 index 00000000000..4990ad7ea15 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base-inl.h @@ -0,0 +1,90 @@ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +#include "simdjson/generic/base.h" +#include "simdjson/generic/implementation_simdjson_result_base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { + +// +// internal::implementation_simdjson_result_base<T> inline implementation +// + +template<typename T> +simdjson_inline void implementation_simdjson_result_base<T>::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward<implementation_simdjson_result_base<T>>(*this).first; + } +} + +template<typename T> +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base<T>::get(T &value) && noexcept { + error_code error; + std::forward<implementation_simdjson_result_base<T>>(*this).tie(value, error); + return error; +} + +template<typename T> +simdjson_inline error_code implementation_simdjson_result_base<T>::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline T& implementation_simdjson_result_base<T>::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template<typename T> +simdjson_inline T&& implementation_simdjson_result_base<T>::value() && noexcept(false) { + return std::forward<implementation_simdjson_result_base<T>>(*this).take_value(); +} + +template<typename T> +simdjson_inline T&& implementation_simdjson_result_base<T>::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward<T>(this->first); +} + +template<typename T> +simdjson_inline implementation_simdjson_result_base<T>::operator T&&() && noexcept(false) { + return std::forward<implementation_simdjson_result_base<T>>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template<typename T> +simdjson_inline const T& implementation_simdjson_result_base<T>::value_unsafe() const& noexcept { + return this->first; +} + +template<typename T> +simdjson_inline T& implementation_simdjson_result_base<T>::value_unsafe() & noexcept { + return this->first; +} + +template<typename T> +simdjson_inline T&& implementation_simdjson_result_base<T>::value_unsafe() && noexcept { + return std::forward<T>(this->first); +} + +template<typename T> +simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward<T>(value)}, second{error} {} +template<typename T> +simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template<typename T> +simdjson_inline implementation_simdjson_result_base<T>::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward<T>(value), SUCCESS) {} + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base.h b/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base.h new file mode 100644 index 00000000000..aaf2bce1267 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/implementation_simdjson_result_base.h @@ -0,0 +1,134 @@ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +#include "simdjson/generic/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result<T> : public internal::implementation_simdjson_result_base<T> { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base<T>() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base<T>(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base<T>(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base<T>(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result<T> will be chainable with your methods. + */ +template<typename T> +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/jsoncharutils.h b/contrib/libs/simdjson/include/simdjson/generic/jsoncharutils.h new file mode 100644 index 00000000000..a79b1f8507d --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/jsoncharutils.h @@ -0,0 +1,104 @@ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_JSONCHARUTILS_H +#include "simdjson/generic/base.h" +#include "simdjson/internal/jsoncharutils_tables.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/generic/numberparsing.h b/contrib/libs/simdjson/include/simdjson/generic/numberparsing.h new file mode 100644 index 00000000000..facc2acc6f9 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/generic/numberparsing.h @@ -0,0 +1,1310 @@ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#define SIMDJSON_GENERIC_NUMBERPARSING_H +#include "simdjson/generic/base.h" +#include "simdjson/generic/jsoncharutils.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <limits> +#include <ostream> +#include <cstring> + +namespace simdjson { +namespace SIMDJSON_IMPLEMENTATION { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast<uint64_t>(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template<typename I> +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast<uint8_t>(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast<uint8_t>(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template<typename W> +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template<typename W> +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template<typename W> +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result<number_type> get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template<typename W> +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast<uint8_t>(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result<number_type> get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast<uint8_t>(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast<const uint8_t *>("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace SIMDJSON_IMPLEMENTATION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell.h b/contrib/libs/simdjson/include/simdjson/haswell.h new file mode 100644 index 00000000000..867b7a44911 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell.h @@ -0,0 +1,8 @@ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + +#include "simdjson/haswell/begin.h" +#include "simdjson/generic/amalgamated.h" +#include "simdjson/haswell/end.h" + +#endif // SIMDJSON_HASWELL_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/haswell/base.h b/contrib/libs/simdjson/include/simdjson/haswell/base.h new file mode 100644 index 00000000000..73cc3ef1b4a --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/base.h @@ -0,0 +1,27 @@ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template <typename T> struct simd8; +template <typename T> struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/begin.h b/contrib/libs/simdjson/include/simdjson/haswell/begin.h new file mode 100644 index 00000000000..1be8c192c57 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/begin.h @@ -0,0 +1,14 @@ +#define SIMDJSON_IMPLEMENTATION haswell + +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/intrinsics.h" + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +#include "simdjson/haswell/bitmanipulation.h" +#include "simdjson/haswell/bitmask.h" +#include "simdjson/haswell/numberparsing_defs.h" +#include "simdjson/haswell/simd.h" +#include "simdjson/haswell/stringparsing_defs.h" diff --git a/contrib/libs/simdjson/include/simdjson/haswell/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/haswell/bitmanipulation.h new file mode 100644 index 00000000000..9b0e59905c8 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/bitmanipulation.h @@ -0,0 +1,71 @@ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/intrinsics.h" +#include "simdjson/haswell/bitmask.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast<unsigned __int64 *>(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/bitmask.h b/contrib/libs/simdjson/include/simdjson/haswell/bitmask.h new file mode 100644 index 00000000000..310c3280bec --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/bitmask.h @@ -0,0 +1,30 @@ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/end.h b/contrib/libs/simdjson/include/simdjson/haswell/end.h new file mode 100644 index 00000000000..421df3653c9 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/end.h @@ -0,0 +1,9 @@ +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +#undef SIMDJSON_IMPLEMENTATION diff --git a/contrib/libs/simdjson/include/simdjson/haswell/implementation.h b/contrib/libs/simdjson/include/simdjson/haswell/implementation.h new file mode 100644 index 00000000000..6861e42983c --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/implementation.h @@ -0,0 +1,36 @@ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/intrinsics.h b/contrib/libs/simdjson/include/simdjson/haswell/intrinsics.h new file mode 100644 index 00000000000..a4593dbb698 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/intrinsics.h @@ -0,0 +1,52 @@ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include <x86intrin.h> +#include <intrin.h> // visual studio or clang +#else +#include <x86intrin.h> // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * <x86intrin.h> (or <intrin.h>) before, so the headers + * are fooled. + */ +#include <bmiintrin.h> // for _blsr_u64 +#include <lzcntintrin.h> // for __lzcnt64 +#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64) +#include <smmintrin.h> +#include <tmmintrin.h> +#include <avxintrin.h> +#include <avx2intrin.h> +#include <wmmintrin.h> // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/haswell/numberparsing_defs.h new file mode 100644 index 00000000000..5673e5e4f81 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/numberparsing_defs.h @@ -0,0 +1,61 @@ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/simd.h b/contrib/libs/simdjson/include/simdjson/haswell/simd.h new file mode 100644 index 00000000000..9c40f4f4fdd --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/simd.h @@ -0,0 +1,372 @@ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/intrinsics.h" +#include "simdjson/haswell/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template<typename Child> + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template<typename T> + struct simd8; + + template<typename T, typename Mask=simd8<bool>> + struct base8: base<simd8<T>> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base<simd8<T>>() {} + simdjson_inline base8(const __m256i _value) : base<simd8<T>>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base<T>::value); + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base8<bool> { + static simdjson_inline simd8<bool> splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8<bool>(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8<bool>(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8<bool> operator~() const { return *this ^ true; } + }; + + template<typename T> + struct base8_numeric: base8<T> { + static simdjson_inline simd8<T> splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8<T> zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8<T> load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8<T> repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8<T>() {} + simdjson_inline base8_numeric(const __m256i _value) : base8<T>(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); } + simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8<L> compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template<typename L> + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> : base8_numeric<int8_t> { + simdjson_inline simd8() : base8_numeric<int8_t>() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base8_numeric<uint8_t> { + simdjson_inline simd8() : base8_numeric<uint8_t>() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } + simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; } + simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template<int N> + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + this->chunks[1].store(ptr+sizeof(simd8<T>)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8<T> reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64<T> bit_or(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<T>( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const { + return simd8x64<bool>( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64<T> + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H diff --git a/contrib/libs/simdjson/include/simdjson/haswell/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/haswell/stringparsing_defs.h new file mode 100644 index 00000000000..f896a10e2c1 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/haswell/stringparsing_defs.h @@ -0,0 +1,48 @@ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/haswell/base.h" +#include "simdjson/haswell/simd.h" +#include "simdjson/haswell/bitmanipulation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast<uint32_t>((v == '\\').to_bitmask()), // bs_bits + static_cast<uint32_t>((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake.h b/contrib/libs/simdjson/include/simdjson/icelake.h new file mode 100644 index 00000000000..96429603406 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake.h @@ -0,0 +1,8 @@ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + +#include "simdjson/icelake/begin.h" +#include "simdjson/generic/amalgamated.h" +#include "simdjson/icelake/end.h" + +#endif // SIMDJSON_ICELAKE_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/icelake/base.h b/contrib/libs/simdjson/include/simdjson/icelake/base.h new file mode 100644 index 00000000000..f44c0d32a1f --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/base.h @@ -0,0 +1,20 @@ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/begin.h b/contrib/libs/simdjson/include/simdjson/icelake/begin.h new file mode 100644 index 00000000000..60fe2cd6bf9 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/begin.h @@ -0,0 +1,13 @@ +#define SIMDJSON_IMPLEMENTATION icelake +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/intrinsics.h" + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +#include "simdjson/icelake/bitmanipulation.h" +#include "simdjson/icelake/bitmask.h" +#include "simdjson/icelake/simd.h" +#include "simdjson/icelake/stringparsing_defs.h" +#include "simdjson/icelake/numberparsing_defs.h" diff --git a/contrib/libs/simdjson/include/simdjson/icelake/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/icelake/bitmanipulation.h new file mode 100644 index 00000000000..5bcf7116015 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/bitmanipulation.h @@ -0,0 +1,70 @@ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast<unsigned __int64 *>(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/bitmask.h b/contrib/libs/simdjson/include/simdjson/icelake/bitmask.h new file mode 100644 index 00000000000..ed55962fa49 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/bitmask.h @@ -0,0 +1,30 @@ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/end.h b/contrib/libs/simdjson/include/simdjson/icelake/end.h new file mode 100644 index 00000000000..2accd5fb0d5 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/end.h @@ -0,0 +1,9 @@ +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +#undef SIMDJSON_IMPLEMENTATION diff --git a/contrib/libs/simdjson/include/simdjson/icelake/implementation.h b/contrib/libs/simdjson/include/simdjson/icelake/implementation.h new file mode 100644 index 00000000000..940c5f9923c --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/implementation.h @@ -0,0 +1,36 @@ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/intrinsics.h b/contrib/libs/simdjson/include/simdjson/icelake/intrinsics.h new file mode 100644 index 00000000000..4c68c290c1e --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/intrinsics.h @@ -0,0 +1,60 @@ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include <x86intrin.h> +#include <intrin.h> // visual studio or clang +#else +#include <x86intrin.h> // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * <x86intrin.h> (or <intrin.h>) before, so the headers + * are fooled. + */ +#include <bmiintrin.h> // for _blsr_u64 +#include <lzcntintrin.h> // for __lzcnt64 +#include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64) +#include <smmintrin.h> +#include <tmmintrin.h> +#include <avxintrin.h> +#include <avx2intrin.h> +#include <wmmintrin.h> // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include <avx512fintrin.h> +#include <avx512dqintrin.h> +#include <avx512cdintrin.h> +#include <avx512bwintrin.h> +#include <avx512vlintrin.h> +#include <avx512vbmiintrin.h> +#include <avx512vbmi2intrin.h> +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/icelake/numberparsing_defs.h new file mode 100644 index 00000000000..b095cf6c66c --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/numberparsing_defs.h @@ -0,0 +1,57 @@ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/intrinsics.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/simd.h b/contrib/libs/simdjson/include/simdjson/icelake/simd.h new file mode 100644 index 00000000000..04203f4b9a5 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/simd.h @@ -0,0 +1,372 @@ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/intrinsics.h" +#include "simdjson/icelake/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template<typename Child> + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template<typename T> + struct simd8; + + template<typename T, typename Mask=simd8<bool>> + struct base8: base<simd8<T>> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base<simd8<T>>() {} + simdjson_inline base8(const __m512i _value) : base<simd8<T>>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8<T> lhs, const simd8<T> rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base<T>::value); + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base8<bool> { + static simdjson_inline simd8<bool> splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8<bool>(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8<bool>(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8<bool> operator~() const { return *this ^ true; } + }; + + template<typename T> + struct base8_numeric: base8<T> { + static simdjson_inline simd8<T> splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8<T> zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8<T> load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast<const __m512i *>(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8<T> repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8<T>() {} + simdjson_inline base8_numeric(const __m512i _value) : base8<T>(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); } + simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8<L> compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template<typename L> + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> : base8_numeric<int8_t> { + simdjson_inline simd8() : base8_numeric<int8_t>() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric<int8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base8_numeric<uint8_t> { + simdjson_inline simd8() : base8_numeric<uint8_t>() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric<uint8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8<bool> bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template<int N> + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8<T> chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + } + + simdjson_inline simd8<T> reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64<T> bit_or(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<T>( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64<T> + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H diff --git a/contrib/libs/simdjson/include/simdjson/icelake/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/icelake/stringparsing_defs.h new file mode 100644 index 00000000000..4cc582737f2 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/icelake/stringparsing_defs.h @@ -0,0 +1,48 @@ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/icelake/base.h" +#include "simdjson/icelake/simd.h" +#include "simdjson/icelake/bitmanipulation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast<uint64_t>(v == '\\'), // bs_bits + static_cast<uint64_t>(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/implementation.h b/contrib/libs/simdjson/include/simdjson/implementation.h new file mode 100644 index 00000000000..19cb3716297 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/implementation.h @@ -0,0 +1,230 @@ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +#include "simdjson/internal/atomic_ptr.h" +#include "simdjson/internal/dom_parser_implementation.h" + +#include <memory> + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual std::string name() const { return std::string(_name); } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual std::string description() const { return std::string(_description); } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr<internal::dom_parser_implementation> &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } +protected: + ~implementation() = default; + +private: + /** + * The name of this implementation. + */ + std::string_view _name; + + /** + * The description of this implementation. + */ + std::string_view _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/implementation_detection.h b/contrib/libs/simdjson/include/simdjson/implementation_detection.h new file mode 100644 index 00000000000..0ff315b7adb --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/implementation_detection.h @@ -0,0 +1,168 @@ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H + +#include "simdjson/base.h" + +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#define SIMDJSON_IMPLEMENTATION_ID_lsx 7 +#define SIMDJSON_IMPLEMENTATION_ID_lasx 8 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 0 +#endif + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#endif + +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif + +#if (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 0 +#endif + +#ifndef SIMDJSON_IMPLEMENTATION_LASX +#define SIMDJSON_IMPLEMENTATION_LASX (SIMDJSON_IS_LOONGARCH64 && __loongarch_asx) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LASX (SIMDJSON_IMPLEMENTATION_LASX) + +#ifndef SIMDJSON_IMPLEMENTATION_LSX +#if SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_IMPLEMENTATION_LSX 0 +#else +#define SIMDJSON_IMPLEMENTATION_LSX (SIMDJSON_IS_LOONGARCH64 && __loongarch_sx) +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LSX (SIMDJSON_IMPLEMENTATION_LSX) + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 || SIMDJSON_CAN_ALWAYS_RUN_LSX || SIMDJSON_CAN_ALWAYS_RUN_LASX +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_LSX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lsx +#elif SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lasx +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/internal/atomic_ptr.h b/contrib/libs/simdjson/include/simdjson/internal/atomic_ptr.h new file mode 100644 index 00000000000..c4fe41b05a6 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/atomic_ptr.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H +#define SIMDJSON_INTERNAL_ATOMIC_PTR_H + +#include "simdjson/base.h" +#include <atomic> + +namespace simdjson { +namespace internal { + +template<typename T> +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic<T*> ptr; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/dom_parser_implementation.h b/contrib/libs/simdjson/include/simdjson/internal/dom_parser_implementation.h new file mode 100644 index 00000000000..a93fe38ff9e --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/dom_parser_implementation.h @@ -0,0 +1,252 @@ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +#include "simdjson/base.h" +#include "simdjson/error.h" +#include <memory> + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr<uint32_t[]> structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_pure simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_pure simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_pure simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_pure simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/instruction_set.h b/contrib/libs/simdjson/include/simdjson/internal/instruction_set.h new file mode 100644 index 00000000000..1dc0a81fb3c --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/instruction_set.h @@ -0,0 +1,77 @@ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + +namespace simdjson { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + LSX = 0x20000, + LASX = 0x40000, +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/jsoncharutils_tables.h b/contrib/libs/simdjson/include/simdjson/internal/jsoncharutils_tables.h new file mode 100644 index 00000000000..d72bce12262 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/jsoncharutils_tables.h @@ -0,0 +1,26 @@ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + +#include "simdjson/base.h" + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/numberparsing_tables.h b/contrib/libs/simdjson/include/simdjson/internal/numberparsing_tables.h new file mode 100644 index 00000000000..1762056f750 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/numberparsing_tables.h @@ -0,0 +1,59 @@ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + +#include "simdjson/base.h" + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/simdprune_tables.h b/contrib/libs/simdjson/include/simdjson/internal/simdprune_tables.h new file mode 100644 index 00000000000..7b8f4650c15 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/simdprune_tables.h @@ -0,0 +1,21 @@ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +#include "simdjson/base.h" + +#include <cstdint> + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H diff --git a/contrib/libs/simdjson/include/simdjson/internal/tape_type.h b/contrib/libs/simdjson/include/simdjson/internal/tape_type.h new file mode 100644 index 00000000000..d43c57c7a49 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/internal/tape_type.h @@ -0,0 +1,28 @@ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/base.h b/contrib/libs/simdjson/include/simdjson/lasx/base.h new file mode 100644 index 00000000000..9d9a866c375 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/base.h @@ -0,0 +1,26 @@ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template <typename T> struct simd8; +template <typename T> struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/begin.h b/contrib/libs/simdjson/include/simdjson/lasx/begin.h new file mode 100644 index 00000000000..560eba737ce --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/begin.h @@ -0,0 +1,10 @@ +#define SIMDJSON_IMPLEMENTATION lasx +#include "simdjson/lasx/base.h" +#include "simdjson/lasx/intrinsics.h" +#include "simdjson/lasx/bitmanipulation.h" +#include "simdjson/lasx/bitmask.h" +#include "simdjson/lasx/numberparsing_defs.h" +#include "simdjson/lasx/simd.h" +#include "simdjson/lasx/stringparsing_defs.h" + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 diff --git a/contrib/libs/simdjson/include/simdjson/lasx/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/lasx/bitmanipulation.h new file mode 100644 index 00000000000..962ddbf56bc --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/bitmanipulation.h @@ -0,0 +1,50 @@ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#include "simdjson/lasx/intrinsics.h" +#include "simdjson/lasx/bitmask.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/bitmask.h b/contrib/libs/simdjson/include/simdjson/lasx/bitmask.h new file mode 100644 index 00000000000..e847c1d7100 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/bitmask.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif diff --git a/contrib/libs/simdjson/include/simdjson/lasx/implementation.h b/contrib/libs/simdjson/include/simdjson/lasx/implementation.h new file mode 100644 index 00000000000..8aafbb8b88b --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/implementation.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lasx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/intrinsics.h b/contrib/libs/simdjson/include/simdjson/lasx/intrinsics.h new file mode 100644 index 00000000000..47b152d9a4d --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/intrinsics.h @@ -0,0 +1,14 @@ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// This should be the correct header whether +// you use visual studio or other compilers. +#error #include <lasxintrin.h> + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/lasx/numberparsing_defs.h new file mode 100644 index 00000000000..cb9278261c6 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/numberparsing_defs.h @@ -0,0 +1,41 @@ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#include "simdjson/lasx/intrinsics.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <cstring> + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/simd.h b/contrib/libs/simdjson/include/simdjson/lasx/simd.h new file mode 100644 index 00000000000..907a8acb9b3 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/simd.h @@ -0,0 +1,376 @@ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#include "simdjson/lasx/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template<typename Child> + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template<typename T> + struct simd8; + + template<typename T, typename Mask=simd8<bool>> + struct base8: base<simd8<T>> { + simdjson_inline base8() : base<simd8<T>>() {} + simdjson_inline base8(const __m256i _value) : base<simd8<T>>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base<simd8<T>>::value); + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base8<bool> { + static simdjson_inline simd8<bool> splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8<bool>(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8<bool>(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8<bool> operator~() const { return *this ^ true; } + }; + + template<typename T> + struct base8_numeric: base8<T> { + static simdjson_inline simd8<T> splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8<T> zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8<T> load(const T values[32]) { + return __lasx_xvld(reinterpret_cast<const __m256i *>(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8<T> repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8<T>() {} + simdjson_inline base8_numeric(const __m256i _value) : base8<T>(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<T> operator+(const simd8<T> other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8<T> operator-(const simd8<T> other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); } + simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template<typename L> + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast<void*>(reinterpret_cast<unsigned long>(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast<void*>(reinterpret_cast<unsigned long>(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast<uint8_t*>(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> : base8_numeric<int8_t> { + simdjson_inline simd8() : base8_numeric<int8_t>() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric<int8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base8_numeric<uint8_t> { + simdjson_inline simd8() : base8_numeric<uint8_t>() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric<uint8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } + simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; } + simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(__lasx_xvsrli_b(*this, N)); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(__lasx_xvslli_b(*this, N)); } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + this->chunks[1].store(ptr+sizeof(simd8<T>)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8<T> reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const { + return simd8x64<bool>( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64<T> + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H diff --git a/contrib/libs/simdjson/include/simdjson/lasx/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/lasx/stringparsing_defs.h new file mode 100644 index 00000000000..fe7a7430e00 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lasx/stringparsing_defs.h @@ -0,0 +1,47 @@ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lasx/base.h" +#include "simdjson/lasx/simd.h" +#include "simdjson/lasx/bitmanipulation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v(src); + v.store(dst); + return { + static_cast<uint32_t>((v == '\\').to_bitmask()), // bs_bits + static_cast<uint32_t>((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/base.h b/contrib/libs/simdjson/include/simdjson/lsx/base.h new file mode 100644 index 00000000000..ff02450184a --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/base.h @@ -0,0 +1,26 @@ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template <typename T> struct simd8; +template <typename T> struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/begin.h b/contrib/libs/simdjson/include/simdjson/lsx/begin.h new file mode 100644 index 00000000000..78a92819a34 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/begin.h @@ -0,0 +1,10 @@ +#define SIMDJSON_IMPLEMENTATION lsx +#include "simdjson/lsx/base.h" +#include "simdjson/lsx/intrinsics.h" +#include "simdjson/lsx/bitmanipulation.h" +#include "simdjson/lsx/bitmask.h" +#include "simdjson/lsx/numberparsing_defs.h" +#include "simdjson/lsx/simd.h" +#include "simdjson/lsx/stringparsing_defs.h" + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 diff --git a/contrib/libs/simdjson/include/simdjson/lsx/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/lsx/bitmanipulation.h new file mode 100644 index 00000000000..96e1794bae5 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/bitmanipulation.h @@ -0,0 +1,50 @@ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#include "simdjson/lsx/intrinsics.h" +#include "simdjson/lsx/bitmask.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/bitmask.h b/contrib/libs/simdjson/include/simdjson/lsx/bitmask.h new file mode 100644 index 00000000000..3a9f0d768c5 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/bitmask.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif diff --git a/contrib/libs/simdjson/include/simdjson/lsx/implementation.h b/contrib/libs/simdjson/include/simdjson/lsx/implementation.h new file mode 100644 index 00000000000..14468777de7 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/implementation.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lsx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/intrinsics.h b/contrib/libs/simdjson/include/simdjson/lsx/intrinsics.h new file mode 100644 index 00000000000..c4cf1dd6094 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/intrinsics.h @@ -0,0 +1,14 @@ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// This should be the correct header whether +// you use visual studio or other compilers. +#error #include <lsxintrin.h> + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/lsx/numberparsing_defs.h new file mode 100644 index 00000000000..1dfe4264e92 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/numberparsing_defs.h @@ -0,0 +1,41 @@ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#include "simdjson/lsx/intrinsics.h" +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#include <cstring> + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/simd.h b/contrib/libs/simdjson/include/simdjson/lsx/simd.h new file mode 100644 index 00000000000..3f0d66560d3 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/simd.h @@ -0,0 +1,354 @@ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#include "simdjson/lsx/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template<typename Child> + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template<typename T> + struct simd8; + + template<typename T, typename Mask=simd8<bool>> + struct base8: base<simd8<T>> { + simdjson_inline base8() : base<simd8<T>>() {} + simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base<simd8<T>>::value); + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base8<bool> { + static simdjson_inline simd8<bool> splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8<bool>(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8<bool>(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8<bool> operator~() const { return *this ^ true; } + }; + + template<typename T> + struct base8_numeric: base8<T> { + static simdjson_inline simd8<T> splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8<T> zero() { return __lsx_vldi(0); } + static simdjson_inline simd8<T> load(const T values[16]) { + return __lsx_vld(reinterpret_cast<const __m128i *>(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8<T> repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8<T>() {} + simdjson_inline base8_numeric(const __m128i _value) : base8<T>(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<T> operator+(const simd8<T> other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8<T> operator-(const simd8<T> other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); } + simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template<typename L> + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast<void*>(reinterpret_cast<unsigned long>(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast<uint8_t*>(output), 0); + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> : base8_numeric<int8_t> { + simdjson_inline simd8() : base8_numeric<int8_t>() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base8_numeric<uint8_t> { + simdjson_inline simd8() : base8_numeric<uint8_t>() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } + simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; } + simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(__lsx_vsrli_b(*this, N)); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(__lsx_vslli_b(*this, N)); } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast<uint8_t*>(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast<T*>(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast<T*>(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast<T*>(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast<T*>(voutput)); + voutput += zcnt4; + return reinterpret_cast<uint64_t>(voutput) - reinterpret_cast<uint64_t>(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + this->chunks[1].store(ptr+sizeof(simd8<T>)*1); + this->chunks[2].store(ptr+sizeof(simd8<T>)*2); + this->chunks[3].store(ptr+sizeof(simd8<T>)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8<T> reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const { + return simd8x64<bool>( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64<T> + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H diff --git a/contrib/libs/simdjson/include/simdjson/lsx/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/lsx/stringparsing_defs.h new file mode 100644 index 00000000000..af493dc55f6 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/lsx/stringparsing_defs.h @@ -0,0 +1,53 @@ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/lsx/base.h" +#include "simdjson/lsx/simd.h" +#include "simdjson/lsx/bitmanipulation.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v0(src); + simd8<uint8_t> v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/portability.h b/contrib/libs/simdjson/include/simdjson/portability.h new file mode 100644 index 00000000000..ff65aa9deb3 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/portability.h @@ -0,0 +1,204 @@ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <cfloat> +#include <cassert> +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include <strings.h> +#endif + +// We are using size_t without namespace std:: throughout the project +using std::size_t; + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__riscv) && __riscv_xlen == 64 +#define SIMDJSON_IS_RISCV64 1 +#elif defined(__loongarch_lp64) +#define SIMDJSON_IS_LOONGARCH64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#if defined(__ALTIVEC__) +#define SIMDJSON_IS_PPC64_VMX 1 +#endif // defined(__ALTIVEC__) +#else +#define SIMDJSON_IS_32BITS 1 + +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif + +#if SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +// In the future, we should allow programmers +// to get warning. +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +#define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ +#define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) +#define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + +#if defined(__clang__) || defined(__GNUC__) +#define simdjson_pure [[gnu::pure]] +#else +#define simdjson_pure +#endif + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, +// then do away with asserts and use __assume. +#if SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// This should only ever be enabled in debug mode. +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere.h b/contrib/libs/simdjson/include/simdjson/westmere.h new file mode 100644 index 00000000000..f05ba1145c0 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere.h @@ -0,0 +1,8 @@ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +#include "simdjson/westmere/begin.h" +#include "simdjson/generic/amalgamated.h" +#include "simdjson/westmere/end.h" + +#endif // SIMDJSON_WESTMERE_H
\ No newline at end of file diff --git a/contrib/libs/simdjson/include/simdjson/westmere/base.h b/contrib/libs/simdjson/include/simdjson/westmere/base.h new file mode 100644 index 00000000000..82ad333a8dc --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/base.h @@ -0,0 +1,29 @@ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template <typename T> struct simd8; +template <typename T> struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/begin.h b/contrib/libs/simdjson/include/simdjson/westmere/begin.h new file mode 100644 index 00000000000..d807e6d9995 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/begin.h @@ -0,0 +1,13 @@ +#define SIMDJSON_IMPLEMENTATION westmere +#include "simdjson/westmere/base.h" +#include "simdjson/westmere/intrinsics.h" + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +#include "simdjson/westmere/bitmanipulation.h" +#include "simdjson/westmere/bitmask.h" +#include "simdjson/westmere/numberparsing_defs.h" +#include "simdjson/westmere/simd.h" +#include "simdjson/westmere/stringparsing_defs.h" diff --git a/contrib/libs/simdjson/include/simdjson/westmere/bitmanipulation.h b/contrib/libs/simdjson/include/simdjson/westmere/bitmanipulation.h new file mode 100644 index 00000000000..7cd29a406a7 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/bitmanipulation.h @@ -0,0 +1,79 @@ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#include "simdjson/westmere/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast<unsigned __int64 *>(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast<unsigned long long *>(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/bitmask.h b/contrib/libs/simdjson/include/simdjson/westmere/bitmask.h new file mode 100644 index 00000000000..cd79b724119 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/bitmask.h @@ -0,0 +1,30 @@ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#include "simdjson/westmere/intrinsics.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/end.h b/contrib/libs/simdjson/include/simdjson/westmere/end.h new file mode 100644 index 00000000000..bd4a9404903 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/end.h @@ -0,0 +1,9 @@ +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +#undef SIMDJSON_IMPLEMENTATION diff --git a/contrib/libs/simdjson/include/simdjson/westmere/implementation.h b/contrib/libs/simdjson/include/simdjson/westmere/implementation.h new file mode 100644 index 00000000000..37392be2ae9 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/implementation.h @@ -0,0 +1,32 @@ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#include "simdjson/implementation.h" +#include "simdjson/internal/instruction_set.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr<internal::dom_parser_implementation>& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/intrinsics.h b/contrib/libs/simdjson/include/simdjson/westmere/intrinsics.h new file mode 100644 index 00000000000..63a351c8090 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/intrinsics.h @@ -0,0 +1,31 @@ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include <x86intrin.h> +#include <intrin.h> // visual studio or clang +#else +#include <x86intrin.h> // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include <smmintrin.h> // for _mm_alignr_epi8 +#include <wmmintrin.h> // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/numberparsing_defs.h b/contrib/libs/simdjson/include/simdjson/westmere/numberparsing_defs.h new file mode 100644 index 00000000000..05cfccfd10d --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/numberparsing_defs.h @@ -0,0 +1,59 @@ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +#include "simdjson/westmere/base.h" +#include "simdjson/westmere/intrinsics.h" + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/internal/numberparsing_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast<const __m128i *>(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/simd.h b/contrib/libs/simdjson/include/simdjson/westmere/simd.h new file mode 100644 index 00000000000..28329d9642e --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/simd.h @@ -0,0 +1,338 @@ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +#ifndef SIMDJSON_CONDITIONAL_INCLUDE +#include "simdjson/westmere/base.h" +#include "simdjson/westmere/bitmanipulation.h" +#include "simdjson/internal/simdprune_tables.h" +#endif // SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template<typename Child> + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast<Child*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template<typename T, typename Mask=simd8<bool>> + struct base8: base<simd8<T>> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base<simd8<T>>() {} + simdjson_inline base8(const __m128i _value) : base<simd8<T>>(_value) {} + + friend simdjson_inline Mask operator==(const simd8<T> lhs, const simd8<T> rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base<simd8<T>>::value); + + template<int N=1> + simdjson_inline simd8<T> prev(const simd8<T> prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8<bool>: base8<bool> { + static simdjson_inline simd8<bool> splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8<bool>(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8<bool>(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8<bool> operator~() const { return *this ^ true; } + }; + + template<typename T> + struct base8_numeric: base8<T> { + static simdjson_inline simd8<T> splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8<T> zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8<T> load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8<T> repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8<T>() {} + simdjson_inline base8_numeric(const __m128i _value) : base8<T>(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8<T> operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8<T> operator+(const simd8<T> other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8<T> operator-(const simd8<T> other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8<T>& operator+=(const simd8<T> other) { *this = *this + other; return *static_cast<simd8<T>*>(this); } + simdjson_inline simd8<T>& operator-=(const simd8<T> other) { *this = *this - other; return *static_cast<simd8<T>*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template<typename L> + simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8<L> compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template<typename L> + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast<const __m128i *>(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template<typename L> + simdjson_inline simd8<L> lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8<L>::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8<int8_t> : base8_numeric<int8_t> { + simdjson_inline simd8() : base8_numeric<int8_t>() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric<int8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<int8_t> repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8<int8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8<int8_t> max_val(const simd8<int8_t> other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8<int8_t> min_val(const simd8<int8_t> other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8<bool> operator>(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8<bool> operator<(const simd8<int8_t> other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8<uint8_t>: base8_numeric<uint8_t> { + simdjson_inline simd8() : base8_numeric<uint8_t>() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric<uint8_t>(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8<uint8_t> repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8<uint8_t>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8<uint8_t> max_val(const simd8<uint8_t> other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8<uint8_t> min_val(const simd8<uint8_t> other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return other.saturating_sub(*this); } + simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return other.max_val(*this) == other; } + simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return other.min_val(*this) == other; } + simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8<bool> bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return !bits_not_set_anywhere(bits); } + template<int N> + simdjson_inline simd8<uint8_t> shr() const { return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template<int N> + simdjson_inline simd8<uint8_t> shl() const { return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template<int N> + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template<typename T> + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8<T> chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64<T>& o) = delete; // no copy allowed + simd8x64<T>& operator=(const simd8<T>& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8<T>)*0); + this->chunks[1].store(ptr+sizeof(simd8<T>)*1); + this->chunks[2].store(ptr+sizeof(simd8<T>)*2); + this->chunks[3].store(ptr+sizeof(simd8<T>)*3); + } + + simdjson_inline simd8<T> reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64<uint8_t> &other) const { + return simd8x64<bool>( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8<T> mask = simd8<T>::splat(m); + return simd8x64<bool>( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64<T> + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H diff --git a/contrib/libs/simdjson/include/simdjson/westmere/stringparsing_defs.h b/contrib/libs/simdjson/include/simdjson/westmere/stringparsing_defs.h new file mode 100644 index 00000000000..439f19cbc73 --- /dev/null +++ b/contrib/libs/simdjson/include/simdjson/westmere/stringparsing_defs.h @@ -0,0 +1,47 @@ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +#include "simdjson/westmere/bitmanipulation.h" +#include "simdjson/westmere/simd.h" + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8<uint8_t> v0(src); + simd8<uint8_t> v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64<bool>(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H |
