diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2022-12-10 21:46:46 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2022-12-10 21:46:46 +0300 |
commit | 7820b7b16b16b28cbdfcf31c646531094359fd19 (patch) | |
tree | 2e73fe3618b28e927100abab45ac1203eae16edc /contrib/restricted/fast_float | |
parent | 8e003675e20f29143ae6088e4beebfa7256c825c (diff) | |
download | ydb-7820b7b16b16b28cbdfcf31c646531094359fd19.tar.gz |
Update contrib/restricted/fast_float to 3.8.1
Diffstat (limited to 'contrib/restricted/fast_float')
6 files changed, 167 insertions, 39 deletions
diff --git a/contrib/restricted/fast_float/README.md b/contrib/restricted/fast_float/README.md index 92eb3a4d2b..d6ae279527 100644 --- a/contrib/restricted/fast_float/README.md +++ b/contrib/restricted/fast_float/README.md @@ -70,7 +70,7 @@ Furthermore, we have the following restrictions: We support Visual Studio, macOS, Linux, freeBSD. We support big and little endian. We support 32-bit and 64-bit systems. - +We assume that the rounding mode is set to nearest (`std::fegetround() == FE_TONEAREST`). ## Using commas as decimal separator @@ -126,13 +126,13 @@ You can parse delimited numbers: ## Reference -- Daniel Lemire, [Number Parsing at a Gigabyte per Second](https://arxiv.org/abs/2101.11408), Software: Pratice and Experience 51 (8), 2021. +- Daniel Lemire, [Number Parsing at a Gigabyte per Second](https://arxiv.org/abs/2101.11408), Software: Practice and Experience 51 (8), 2021. ## Other programming languages - [There is an R binding](https://github.com/eddelbuettel/rcppfastfloat) called `rcppfastfloat`. - [There is a Rust port of the fast_float library](https://github.com/aldanor/fast-float-rust/) called `fast-float-rust`. -- [There is a Java port of the fast_float library](https://github.com/wrandelshofer/FastDoubleParser) called `FastDoubleParser`. +- [There is a Java port of the fast_float library](https://github.com/wrandelshofer/FastDoubleParser) called `FastDoubleParser`. It used for important systems such as [Jackson](https://github.com/FasterXML/jackson-core). - [There is a C# port of the fast_float library](https://github.com/CarlVerret/csFastFloat) called `csFastFloat`. diff --git a/contrib/restricted/fast_float/include/fast_float/ascii_number.h b/contrib/restricted/fast_float/include/fast_float/ascii_number.h index 3e6bb3e9ef..1783bd4ab1 100644 --- a/contrib/restricted/fast_float/include/fast_float/ascii_number.h +++ b/contrib/restricted/fast_float/include/fast_float/ascii_number.h @@ -106,10 +106,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } while ((p != pend) && is_integer(*p)) { // a multiplication by 10 is cheaper than an arbitrary integer // multiplication diff --git a/contrib/restricted/fast_float/include/fast_float/bigint.h b/contrib/restricted/fast_float/include/fast_float/bigint.h index b56cb9b03b..220e0572a3 100644 --- a/contrib/restricted/fast_float/include/fast_float/bigint.h +++ b/contrib/restricted/fast_float/include/fast_float/bigint.h @@ -17,7 +17,7 @@ namespace fast_float { // we might have platforms where `CHAR_BIT` is not 8, so let's avoid // doing `8 * sizeof(limb)`. #if defined(FASTFLOAT_64BIT) && !defined(__sparc) -#define FASTFLOAT_64BIT_LIMB +#define FASTFLOAT_64BIT_LIMB 1 typedef uint64_t limb; constexpr size_t limb_bits = 64; #else diff --git a/contrib/restricted/fast_float/include/fast_float/fast_float.h b/contrib/restricted/fast_float/include/fast_float/fast_float.h index 3c483803af..ad3093ef88 100644 --- a/contrib/restricted/fast_float/include/fast_float/fast_float.h +++ b/contrib/restricted/fast_float/include/fast_float/fast_float.h @@ -44,7 +44,7 @@ struct parse_options { * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of * the type `fast_float::chars_format`. It is a bitset value: we check whether * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set - * to determine whether we allowe the fixed point and scientific notation respectively. + * to determine whether we allow the fixed point and scientific notation respectively. * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. */ template<typename T> diff --git a/contrib/restricted/fast_float/include/fast_float/float_common.h b/contrib/restricted/fast_float/include/fast_float/float_common.h index 55a2e4c32b..c2084e0ed9 100644 --- a/contrib/restricted/fast_float/include/fast_float/float_common.h +++ b/contrib/restricted/fast_float/include/fast_float/float_common.h @@ -12,11 +12,11 @@ || defined(__MINGW64__) \ || defined(__s390x__) \ || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) ) -#define FASTFLOAT_64BIT +#define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ || defined(__arm__) || defined(_M_ARM) \ || defined(__MINGW32__) || defined(__EMSCRIPTEN__)) -#define FASTFLOAT_32BIT +#define FASTFLOAT_32BIT 1 #else // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. // We can never tell the register width, but the SIZE_MAX is a good approximation. @@ -24,9 +24,9 @@ #if SIZE_MAX == 0xffff #error Unknown platform (16-bit, unsupported) #elif SIZE_MAX == 0xffffffff - #define FASTFLOAT_32BIT + #define FASTFLOAT_32BIT 1 #elif SIZE_MAX == 0xffffffffffffffff - #define FASTFLOAT_64BIT + #define FASTFLOAT_64BIT 1 #else #error Unknown platform (not 32-bit, not 64-bit?) #endif @@ -183,8 +183,9 @@ fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, fastfloat_really_inline value128 full_multiplication(uint64_t a, uint64_t b) { value128 answer; -#ifdef _M_ARM64 +#if defined(_M_ARM64) && !defined(__MINGW32__) // ARM64 has native support for 64-bit multiplications, no need to emulate + // But MinGW on ARM64 doesn't have native support for 64-bit multiplications answer.high = __umulh(a, b); answer.low = a * b; #elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__)) @@ -219,6 +220,50 @@ constexpr static double powers_of_ten_double[] = { 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}; +// used for max_mantissa_double and max_mantissa_float +constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; +// Largest integer value v so that (5**index * v) <= 1<<53. +// 0x10000000000000 == 1 << 53 +constexpr static uint64_t max_mantissa_double[] = { + 0x10000000000000, + 0x10000000000000 / 5, + 0x10000000000000 / (5 * 5), + 0x10000000000000 / (5 * 5 * 5), + 0x10000000000000 / (5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555), + 0x10000000000000 / (constant_55555 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5)}; + // Largest integer value v so that (5**index * v) <= 1<<24. + // 0x1000000 == 1<<24 + constexpr static uint64_t max_mantissa_float[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; template <typename T> struct binary_format { using equiv_uint = typename std::conditional<sizeof(T) == 4, uint32_t, uint64_t>::type; @@ -227,11 +272,12 @@ template <typename T> struct binary_format { static inline constexpr int minimum_exponent(); static inline constexpr int infinite_power(); static inline constexpr int sign_index(); - static inline constexpr int min_exponent_fast_path(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int max_exponent_fast_path(); static inline constexpr int max_exponent_round_to_even(); static inline constexpr int min_exponent_round_to_even(); - static inline constexpr uint64_t max_mantissa_fast_path(); + static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST static inline constexpr int largest_power_of_ten(); static inline constexpr int smallest_power_of_ten(); static inline constexpr T exact_power_of_ten(int64_t power); @@ -241,6 +287,22 @@ template <typename T> struct binary_format { static inline constexpr equiv_uint hidden_bit_mask(); }; +template <> inline constexpr int binary_format<double>::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> inline constexpr int binary_format<float>::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() { return 52; } @@ -281,34 +343,30 @@ template <> inline constexpr int binary_format<float>::infinite_power() { template <> inline constexpr int binary_format<double>::sign_index() { return 63; } template <> inline constexpr int binary_format<float>::sign_index() { return 31; } -template <> inline constexpr int binary_format<double>::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -22; -#endif -} -template <> inline constexpr int binary_format<float>::min_exponent_fast_path() { -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - return 0; -#else - return -10; -#endif -} - template <> inline constexpr int binary_format<double>::max_exponent_fast_path() { return 22; } template <> inline constexpr int binary_format<float>::max_exponent_fast_path() { return 10; } - template <> inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } +template <> inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 22 + // + return max_mantissa_double[power]; +} template <> inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } +template <> inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 10 + // + return max_mantissa_float[power]; +} template <> inline constexpr double binary_format<double>::exact_power_of_ten(int64_t power) { diff --git a/contrib/restricted/fast_float/include/fast_float/parse_number.h b/contrib/restricted/fast_float/include/fast_float/parse_number.h index 62ae3b039e..477288199e 100644 --- a/contrib/restricted/fast_float/include/fast_float/parse_number.h +++ b/contrib/restricted/fast_float/include/fast_float/parse_number.h @@ -60,6 +60,48 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value) n return answer; } +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include <cfenv> + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., asm). + // The value does not need to be std::numeric_limits<float>::min(), any small + // value so that 1 + x should round to 1 would do (after accounting for excess + // precision, as in 387 instructions). + static volatile float fmin = std::numeric_limits<float>::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. + // + // Explanation: + // Only when fegetround() == FE_TONEAREST do we have that + // fmin + 1.0f == 1.0f - fmin. + // + // FE_UPWARD: + // fmin + 1.0f > 1 + // 1.0f - fmin == 1 + // + // FE_DOWNWARD or FE_TOWARDZERO: + // fmin + 1.0f == 1 + // 1.0f - fmin < 1 + // + // Note: This may fail to be accurate if fast-math has been + // enabled, as rounding conventions may not apply. + return (fmini + 1.0f == 1.0f - fmini); +} + } // namespace detail template<typename T> @@ -87,13 +129,45 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - // Next is Clinger's fast path. - if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path() && !pns.too_many_digits) { - value = T(pns.mantissa); - if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); } - else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); } - if (pns.negative) { value = -value; } - return answer; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() returns + // true. + if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if(detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub JelĂnek's proposal + if (pns.exponent >= 0 && pns.mantissa <=binary_format<T>::max_mantissa_fast_path(pns.exponent)) { +#if defined(__clang__) + // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = 0; + return answer; + } +#endif + value = T(pns.mantissa) * binary_format<T>::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } + } } adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa); if(pns.too_many_digits && am.power2 >= 0) { |