aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-06-24 08:00:24 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-06-24 08:00:24 +0300
commit967e2cdf8e721697dad301820fd5959bba2755e5 (patch)
tree107abca6db6dc6d942ec697bdf68b87ceff8d560
parentc203a40b3ad69fa879519c38e45c641cb2eaee84 (diff)
downloadydb-967e2cdf8e721697dad301820fd5959bba2755e5.tar.gz
Update contrib/restricted/fast_float to 5.1.0
-rw-r--r--contrib/restricted/fast_float/README.md2
-rw-r--r--contrib/restricted/fast_float/include/fast_float/ascii_number.h172
-rw-r--r--contrib/restricted/fast_float/include/fast_float/digit_comparison.h24
-rw-r--r--contrib/restricted/fast_float/include/fast_float/float_common.h36
-rw-r--r--contrib/restricted/fast_float/include/fast_float/parse_number.h1
-rw-r--r--contrib/restricted/fast_float/ya.make4
6 files changed, 176 insertions, 63 deletions
diff --git a/contrib/restricted/fast_float/README.md b/contrib/restricted/fast_float/README.md
index 4f3eb22d76..8dffa06a96 100644
--- a/contrib/restricted/fast_float/README.md
+++ b/contrib/restricted/fast_float/README.md
@@ -186,7 +186,7 @@ The fast_float library provides a performance similar to that of the [fast_doubl
## References
- Daniel Lemire, [Number Parsing at a Gigabyte per Second](https://arxiv.org/abs/2101.11408), Software: Practice and Experience 51 (8), 2021.
-- Noble Mushtak, Daniel Lemire, [Fast Number Parsing Without Fallback](https://arxiv.org/abs/2212.06644), Software: Practice and Experience (to appear)
+- Noble Mushtak, Daniel Lemire, [Fast Number Parsing Without Fallback](https://arxiv.org/abs/2212.06644), Software: Practice and Experience 53 (7), 2023.
## Other programming languages
diff --git a/contrib/restricted/fast_float/include/fast_float/ascii_number.h b/contrib/restricted/fast_float/include/fast_float/ascii_number.h
index d506326ec9..481b91df76 100644
--- a/contrib/restricted/fast_float/include/fast_float/ascii_number.h
+++ b/contrib/restricted/fast_float/include/fast_float/ascii_number.h
@@ -5,11 +5,26 @@
#include <cstdint>
#include <cstring>
#include <iterator>
+#include <type_traits>
#include "float_common.h"
+#ifdef FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
+
+
namespace fast_float {
+template <typename UC>
+fastfloat_really_inline constexpr bool has_simd_opt() {
+#ifdef FASTFLOAT_HAS_SIMD
+ return std::is_same<UC, char16_t>::value;
+#else
+ return false;
+#endif
+}
+
// Next function can be micro-optimized, but compilers are entirely
// able to optimize it well.
template <typename UC>
@@ -28,12 +43,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
| (val & 0x00000000000000FF) << 56;
}
+// Read 8 UC into a u64. Truncates UC if not char.
+template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint64_t read_u64(const char *chars) {
- if (cpp20_and_in_constexpr()) {
+uint64_t read8_to_u64(const UC *chars) {
+ if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
uint64_t val = 0;
for(int i = 0; i < 8; ++i) {
- val |= uint64_t(*chars) << (i*8);
+ val |= uint64_t(uint8_t(*chars)) << (i*8);
++chars;
}
return val;
@@ -47,6 +64,39 @@ uint64_t read_u64(const char *chars) {
return val;
}
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const __m128i data) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+ const __m128i packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+ return uint64_t(_mm_cvtsi128_si64(packed));
+#else
+ uint64_t value;
+ // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+ _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed);
+ return value;
+#endif
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+ return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#endif
+
+// dummy for compile
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
+uint64_t simd_read8_to_u64(UC const*) {
+ return 0;
+}
+
+
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void write_u64(uint8_t *chars, uint64_t val) {
if (cpp20_and_in_constexpr()) {
@@ -76,40 +126,80 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
return uint32_t(val);
}
-fastfloat_really_inline constexpr
-uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
- return 0;
-}
-
-fastfloat_really_inline constexpr
-uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
- return 0;
-}
+// Call this if chars are definitely 8 digits.
+template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
- return parse_eight_digits_unrolled(read_u64(chars));
+uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
+ if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
+ return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+ }
+ return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
}
+
// credit @aqrit
-fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
+fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
0x8080808080808080));
}
-fastfloat_really_inline constexpr
-bool is_made_of_eight_digits_fast(const char16_t *) noexcept {
- return false;
+
+#ifdef FASTFLOAT_HAS_SIMD
+
+// Call this if chars might not be 8 digits.
+// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
+// ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
+ if (cpp20_and_in_constexpr()) {
+ return false;
+ }
+#ifdef FASTFLOAT_SSE2
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+ const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
+
+ // (x - '0') <= 9
+ // http://0x80.pl/articles/simd-parsing-int-sequences.html
+ const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+ const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+
+ if (_mm_movemask_epi8(t1) == 0) {
+ i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+ return true;
+ }
+ else return false;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
}
-fastfloat_really_inline constexpr
-bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
- return false;
+#endif
+
+// dummy for compile
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
+uint64_t simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
+ return 0;
+}
+
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
+ if (!has_simd_opt<UC>()) {
+ return;
+ }
+ while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
+ p += 8;
+ }
}
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-bool is_made_of_eight_digits_fast(const char *chars) noexcept {
- return is_made_of_eight_digits_fast(read_u64(chars));
+void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
+ // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+ while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
+ i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
+ p += 8;
+ }
}
template <typename UC>
@@ -124,8 +214,10 @@ struct parsed_number_string_t {
span<const UC> integer{}; // non-nullable
span<const UC> fraction{}; // nullable
};
-using byte_span = span<char>;
+
+using byte_span = span<const char>;
using parsed_number_string = parsed_number_string_t<char>;
+
// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
template <typename UC>
@@ -171,12 +263,8 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
UC const * before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
- if (std::is_same<UC,char>::value) {
- while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
- i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
- p += 8;
- }
- }
+ loop_parse_if_eight_digits(p, pend, i);
+
while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
++p;
@@ -241,6 +329,7 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
if(*start == UC('0')) { digit_count --; }
start++;
}
+
if (digit_count > 19) {
answer.too_many_digits = true;
// Let us start again, this time, avoiding overflows.
@@ -248,22 +337,23 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
// pre-tokenized spans from above.
i = 0;
p = answer.integer.ptr;
- UC const * int_end = p + answer.integer.len();
- const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
- while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+ UC const* int_end = p + answer.integer.len();
+ const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
+ while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
exponent = end_of_integer_part - p + exp_number;
- } else { // We have a value with a fractional component.
- p = answer.fraction.ptr;
- UC const * frac_end = p + answer.fraction.len();
- while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
- i = i * 10 + uint64_t(*p - UC('0'));
- ++p;
- }
- exponent = answer.fraction.ptr - p + exp_number;
+ }
+ else { // We have a value with a fractional component.
+ p = answer.fraction.ptr;
+ UC const* frac_end = p + answer.fraction.len();
+ while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+ i = i * 10 + uint64_t(*p - UC('0'));
+ ++p;
+ }
+ exponent = answer.fraction.ptr - p + exp_number;
}
// We have now corrected both exponent and i, to a truncated value
}
diff --git a/contrib/restricted/fast_float/include/fast_float/digit_comparison.h b/contrib/restricted/fast_float/include/fast_float/digit_comparison.h
index f469f6b553..512a27f5a5 100644
--- a/contrib/restricted/fast_float/include/fast_float/digit_comparison.h
+++ b/contrib/restricted/fast_float/include/fast_float/digit_comparison.h
@@ -201,18 +201,10 @@ bool is_truncated(span<const UC> s) noexcept {
return is_truncated(s.ptr, s.ptr + s.len());
}
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
- // currently unused
-}
-
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
- // currently unused
-}
+template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
-void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
+void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept {
value = value * 100000000 + parse_eight_digits_unrolled(p);
p += 8;
counter += 8;
@@ -264,10 +256,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_
skip_zeros(p, pend);
// process all digits, in increments of step per loop
while (p != pend) {
- if (std::is_same<UC,char>::value) {
- while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
- parse_eight_digits(p, value, counter, digits);
- }
+ while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
+ parse_eight_digits(p, value, counter, digits);
}
while (counter < step && p != pend && digits < max_digits) {
parse_one_digit(p, value, counter, digits);
@@ -299,10 +289,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_
}
// process all digits, in increments of step per loop
while (p != pend) {
- if (std::is_same<UC,char>::value) {
- while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
- parse_eight_digits(p, value, counter, digits);
- }
+ while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
+ parse_eight_digits(p, value, counter, digits);
}
while (counter < step && p != pend && digits < max_digits) {
parse_one_digit(p, value, counter, digits);
diff --git a/contrib/restricted/fast_float/include/fast_float/float_common.h b/contrib/restricted/fast_float/include/fast_float/float_common.h
index 2465ea66a0..b1622b0f21 100644
--- a/contrib/restricted/fast_float/include/fast_float/float_common.h
+++ b/contrib/restricted/fast_float/include/fast_float/float_common.h
@@ -49,7 +49,8 @@ using parse_options = parse_options_t<char>;
|| defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
|| defined(__MINGW64__) \
|| defined(__s390x__) \
- || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) )
+ || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \
+ || defined(__loongarch64) )
#define FASTFLOAT_64BIT 1
#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \
|| defined(__arm__) || defined(_M_ARM) || defined(__ppc__) \
@@ -87,6 +88,8 @@ using parse_options = parse_options_t<char>;
#include <machine/endian.h>
#elif defined(sun) || defined(__sun)
#include <sys/byteorder.h>
+#elif defined(__MVS__)
+#include <sys/endian.h>
#else
#ifdef __has_include
#if __has_include(<endian.h>)
@@ -112,6 +115,34 @@ using parse_options = parse_options_t<char>;
#endif
#endif
+#if defined(__SSE2__) || \
+ (defined(FASTFLOAT_VISUAL_STUDIO) && \
+ (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
+#ifdef FASTFLOAT_SSE2
+#define FASTFLOAT_HAS_SIMD 1
+#endif
+
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS \
+ _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+
+
#ifdef FASTFLOAT_VISUAL_STUDIO
#define fastfloat_really_inline __forceinline
#else
@@ -129,6 +160,9 @@ using parse_options = parse_options_t<char>;
// rust style `try!()` macro, or `?` operator
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
+#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
+
+
namespace fast_float {
fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
diff --git a/contrib/restricted/fast_float/include/fast_float/parse_number.h b/contrib/restricted/fast_float/include/fast_float/parse_number.h
index 4541d70262..e077b9d03d 100644
--- a/contrib/restricted/fast_float/include/fast_float/parse_number.h
+++ b/contrib/restricted/fast_float/include/fast_float/parse_number.h
@@ -166,6 +166,7 @@ from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
if (!pns.valid) {
return detail::parse_infnan(first, last, value);
}
+
answer.ec = std::errc(); // be optimistic
answer.ptr = pns.lastmatch;
// The implementation of the Clinger's fast path is convoluted because
diff --git a/contrib/restricted/fast_float/ya.make b/contrib/restricted/fast_float/ya.make
index cde7739eba..f48a8de4f0 100644
--- a/contrib/restricted/fast_float/ya.make
+++ b/contrib/restricted/fast_float/ya.make
@@ -10,9 +10,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(5.0.0)
+VERSION(5.1.0)
-ORIGINAL_SOURCE(https://github.com/fastfloat/fast_float/archive/v5.0.0.tar.gz)
+ORIGINAL_SOURCE(https://github.com/fastfloat/fast_float/archive/v5.1.0.tar.gz)
NO_COMPILER_WARNINGS()