diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2023-06-24 08:00:24 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2023-06-24 08:00:24 +0300 |
commit | 967e2cdf8e721697dad301820fd5959bba2755e5 (patch) | |
tree | 107abca6db6dc6d942ec697bdf68b87ceff8d560 | |
parent | c203a40b3ad69fa879519c38e45c641cb2eaee84 (diff) | |
download | ydb-967e2cdf8e721697dad301820fd5959bba2755e5.tar.gz |
Update contrib/restricted/fast_float to 5.1.0
6 files changed, 176 insertions, 63 deletions
diff --git a/contrib/restricted/fast_float/README.md b/contrib/restricted/fast_float/README.md index 4f3eb22d76..8dffa06a96 100644 --- a/contrib/restricted/fast_float/README.md +++ b/contrib/restricted/fast_float/README.md @@ -186,7 +186,7 @@ The fast_float library provides a performance similar to that of the [fast_doubl ## References - Daniel Lemire, [Number Parsing at a Gigabyte per Second](https://arxiv.org/abs/2101.11408), Software: Practice and Experience 51 (8), 2021. -- Noble Mushtak, Daniel Lemire, [Fast Number Parsing Without Fallback](https://arxiv.org/abs/2212.06644), Software: Practice and Experience (to appear) +- Noble Mushtak, Daniel Lemire, [Fast Number Parsing Without Fallback](https://arxiv.org/abs/2212.06644), Software: Practice and Experience 53 (7), 2023. ## Other programming languages diff --git a/contrib/restricted/fast_float/include/fast_float/ascii_number.h b/contrib/restricted/fast_float/include/fast_float/ascii_number.h index d506326ec9..481b91df76 100644 --- a/contrib/restricted/fast_float/include/fast_float/ascii_number.h +++ b/contrib/restricted/fast_float/include/fast_float/ascii_number.h @@ -5,11 +5,26 @@ #include <cstdint> #include <cstring> #include <iterator> +#include <type_traits> #include "float_common.h" +#ifdef FASTFLOAT_SSE2 +#include <emmintrin.h> +#endif + + namespace fast_float { +template <typename UC> +fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same<UC, char16_t>::value; +#else + return false; +#endif +} + // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. template <typename UC> @@ -28,12 +43,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } +// Read 8 UC into a u64. Truncates UC if not char. +template <typename UC> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const char *chars) { - if (cpp20_and_in_constexpr()) { +uint64_t read8_to_u64(const UC *chars) { + if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { - val |= uint64_t(*chars) << (i*8); + val |= uint64_t(uint8_t(*chars)) << (i*8); ++chars; } return val; @@ -47,6 +64,39 @@ uint64_t read_u64(const char *chars) { return val; } +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const __m128i data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed); + return value; +#endif +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif + +// dummy for compile +template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())> +uint64_t simd_read8_to_u64(UC const*) { + return 0; +} + + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void write_u64(uint8_t *chars, uint64_t val) { if (cpp20_and_in_constexpr()) { @@ -76,40 +126,80 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } -fastfloat_really_inline constexpr -uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept { - return 0; -} - -fastfloat_really_inline constexpr -uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept { - return 0; -} +// Call this if chars are definitely 8 digits. +template <typename UC> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); } + // credit @aqrit -fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { +fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -fastfloat_really_inline constexpr -bool is_made_of_eight_digits_fast(const char16_t *) noexcept { - return false; + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif } -fastfloat_really_inline constexpr -bool is_made_of_eight_digits_fast(const char32_t *) noexcept { - return false; +#endif + +// dummy for compile +template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())> +uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { + return 0; +} + + +template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)> +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { + if (!has_simd_opt<UC>()) { + return; + } + while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } } fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { - return is_made_of_eight_digits_fast(read_u64(chars)); +void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok + p += 8; + } } template <typename UC> @@ -124,8 +214,10 @@ struct parsed_number_string_t { span<const UC> integer{}; // non-nullable span<const UC> fraction{}; // nullable }; -using byte_span = span<char>; + +using byte_span = span<const char>; using parsed_number_string = parsed_number_string_t<char>; + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. template <typename UC> @@ -171,12 +263,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - if (std::is_same<UC,char>::value) { - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } - } + loop_parse_if_eight_digits(p, pend, i); + while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - UC('0')); ++p; @@ -241,6 +329,7 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par if(*start == UC('0')) { digit_count --; } start++; } + if (digit_count > 19) { answer.too_many_digits = true; // Let us start again, this time, avoiding overflows. @@ -248,22 +337,23 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - UC const * int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { + UC const* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 }; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - UC const * frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; + } + else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const* frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; } // We have now corrected both exponent and i, to a truncated value } diff --git a/contrib/restricted/fast_float/include/fast_float/digit_comparison.h b/contrib/restricted/fast_float/include/fast_float/digit_comparison.h index f469f6b553..512a27f5a5 100644 --- a/contrib/restricted/fast_float/include/fast_float/digit_comparison.h +++ b/contrib/restricted/fast_float/include/fast_float/digit_comparison.h @@ -201,18 +201,10 @@ bool is_truncated(span<const UC> s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept { - // currently unused -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept { - // currently unused -} +template <typename UC> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; @@ -264,10 +256,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_ skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { - if (std::is_same<UC,char>::value) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); @@ -299,10 +289,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_ } // process all digits, in increments of step per loop while (p != pend) { - if (std::is_same<UC,char>::value) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); diff --git a/contrib/restricted/fast_float/include/fast_float/float_common.h b/contrib/restricted/fast_float/include/fast_float/float_common.h index 2465ea66a0..b1622b0f21 100644 --- a/contrib/restricted/fast_float/include/fast_float/float_common.h +++ b/contrib/restricted/fast_float/include/fast_float/float_common.h @@ -49,7 +49,8 @@ using parse_options = parse_options_t<char>; || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ || defined(__MINGW64__) \ || defined(__s390x__) \ - || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) ) + || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \ + || defined(__loongarch64) ) #define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ || defined(__arm__) || defined(_M_ARM) || defined(__ppc__) \ @@ -87,6 +88,8 @@ using parse_options = parse_options_t<char>; #include <machine/endian.h> #elif defined(sun) || defined(__sun) #include <sys/byteorder.h> +#elif defined(__MVS__) +#include <sys/endian.h> #else #ifdef __has_include #if __has_include(<endian.h>) @@ -112,6 +115,34 @@ using parse_options = parse_options_t<char>; #endif #endif +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#ifdef FASTFLOAT_SSE2 +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + + + #ifdef FASTFLOAT_VISUAL_STUDIO #define fastfloat_really_inline __forceinline #else @@ -129,6 +160,9 @@ using parse_options = parse_options_t<char>; // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0 + + namespace fast_float { fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { diff --git a/contrib/restricted/fast_float/include/fast_float/parse_number.h b/contrib/restricted/fast_float/include/fast_float/parse_number.h index 4541d70262..e077b9d03d 100644 --- a/contrib/restricted/fast_float/include/fast_float/parse_number.h +++ b/contrib/restricted/fast_float/include/fast_float/parse_number.h @@ -166,6 +166,7 @@ from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last, if (!pns.valid) { return detail::parse_infnan(first, last, value); } + answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; // The implementation of the Clinger's fast path is convoluted because diff --git a/contrib/restricted/fast_float/ya.make b/contrib/restricted/fast_float/ya.make index cde7739eba..f48a8de4f0 100644 --- a/contrib/restricted/fast_float/ya.make +++ b/contrib/restricted/fast_float/ya.make @@ -10,9 +10,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(5.0.0) +VERSION(5.1.0) -ORIGINAL_SOURCE(https://github.com/fastfloat/fast_float/archive/v5.0.0.tar.gz) +ORIGINAL_SOURCE(https://github.com/fastfloat/fast_float/archive/v5.1.0.tar.gz) NO_COMPILER_WARNINGS() |