diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/restricted/abseil-cpp-tstring/y_absl/strings | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/restricted/abseil-cpp-tstring/y_absl/strings')
98 files changed, 28838 insertions, 0 deletions
diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/.yandex_meta/licenses.list.txt b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..9d8552c68c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/.yandex_meta/licenses.list.txt @@ -0,0 +1,46 @@ +====================Apache-2.0==================== +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +====================Apache-2.0==================== +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +====================COPYRIGHT==================== +// Copyright 2017 The Abseil Authors. + + +====================COPYRIGHT==================== +// Copyright 2018 The Abseil Authors. + + +====================COPYRIGHT==================== +// Copyright 2019 The Abseil Authors. + + +====================COPYRIGHT==================== +// Copyright 2020 The Abseil Authors. + + +====================COPYRIGHT==================== +// Copyright 2021 The Abseil Authors. diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.cc new file mode 100644 index 0000000000..959d6c27ff --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.cc @@ -0,0 +1,200 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/ascii.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace ascii_internal { + +// # Table generated by this Python code (bit 0x02 is currently unused): +// TODO(mbar) Move Python code for generation of table to BUILD and link here. + +// NOTE: The kAsciiPropertyBits table used within this code was generated by +// Python code of the following form. (Bit 0x02 is currently unused and +// available.) +// +// def Hex2(n): +// return '0x' + hex(n/16)[2:] + hex(n%16)[2:] +// def IsPunct(ch): +// return (ord(ch) >= 32 and ord(ch) < 127 and +// not ch.isspace() and not ch.isalnum()) +// def IsBlank(ch): +// return ch in ' \t' +// def IsCntrl(ch): +// return ord(ch) < 32 or ord(ch) == 127 +// def IsXDigit(ch): +// return ch.isdigit() or ch.lower() in 'abcdef' +// for i in range(128): +// ch = chr(i) +// mask = ((ch.isalpha() and 0x01 or 0) | +// (ch.isalnum() and 0x04 or 0) | +// (ch.isspace() and 0x08 or 0) | +// (IsPunct(ch) and 0x10 or 0) | +// (IsBlank(ch) and 0x20 or 0) | +// (IsCntrl(ch) and 0x40 or 0) | +// (IsXDigit(ch) and 0x80 or 0)) +// print Hex2(mask) + ',', +// if i % 16 == 7: +// print ' //', Hex2(i & 0x78) +// elif i % 16 == 15: +// print + +// clang-format off +// Array of bitfields holding character information. Each bit value corresponds +// to a particular character feature. For readability, and because the value +// of these bits is tightly coupled to this implementation, the individual bits +// are not named. Note that bitfields for all characters above ASCII 127 are +// zero-initialized. +ABSL_DLL const unsigned char kPropertyBits[256] = { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 + 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 + 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, +}; + +// Array of characters for the ascii_tolower() function. For values 'A' +// through 'Z', return the lower-case character; otherwise, return the +// identity of the passed character. +ABSL_DLL const char kToLower[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; + +// Array of characters for the ascii_toupper() function. For values 'a' +// through 'z', return the upper-case character; otherwise, return the +// identity of the passed character. +ABSL_DLL const char kToUpper[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; +// clang-format on + +} // namespace ascii_internal + +void AsciiStrToLower(TString* s) { + for (auto& ch : *s) { + ch = y_absl::ascii_tolower(ch); + } +} + +void AsciiStrToUpper(TString* s) { + for (auto& ch : *s) { + ch = y_absl::ascii_toupper(ch); + } +} + +void RemoveExtraAsciiWhitespace(TString* str) { + auto stripped = StripAsciiWhitespace(*str); + + if (stripped.empty()) { + str->clear(); + return; + } + + auto input_it = stripped.begin(); + auto input_end = stripped.end(); + auto output_it = &(*str)[0]; + bool is_ws = false; + + for (; input_it < input_end; ++input_it) { + if (is_ws) { + // Consecutive whitespace? Keep only the last. + is_ws = y_absl::ascii_isspace(*input_it); + if (is_ws) --output_it; + } else { + is_ws = y_absl::ascii_isspace(*input_it); + } + + *output_it = *input_it; + ++output_it; + } + + str->erase(output_it - &(*str)[0]); +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.h new file mode 100644 index 0000000000..bc04710d8c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ascii.h @@ -0,0 +1,242 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: ascii.h +// ----------------------------------------------------------------------------- +// +// This package contains functions operating on characters and strings +// restricted to standard ASCII. These include character classification +// functions analogous to those found in the ANSI C Standard Library <ctype.h> +// header file. +// +// C++ implementations provide <ctype.h> functionality based on their +// C environment locale. In general, reliance on such a locale is not ideal, as +// the locale standard is problematic (and may not return invariant information +// for the same character set, for example). These `ascii_*()` functions are +// hard-wired for standard ASCII, much faster, and guaranteed to behave +// consistently. They will never be overloaded, nor will their function +// signature change. +// +// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, +// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, +// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, +// `ascii_isxdigit()` +// Analogous to the <ctype.h> functions with similar names, these +// functions take an unsigned char and return a bool, based on whether the +// character matches the condition specified. +// +// If the input character has a numerical value greater than 127, these +// functions return `false`. +// +// `ascii_tolower()`, `ascii_toupper()` +// Analogous to the <ctype.h> functions with similar names, these functions +// take an unsigned char and return a char. +// +// If the input character is not an ASCII {lower,upper}-case letter (including +// numerical values greater than 127) then the functions return the same value +// as the input character. + +#ifndef ABSL_STRINGS_ASCII_H_ +#define ABSL_STRINGS_ASCII_H_ + +#include <algorithm> +#include <util/generic/string.h> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace ascii_internal { + +// Declaration for an array of bitfields holding character information. +ABSL_DLL extern const unsigned char kPropertyBits[256]; + +// Declaration for the array of characters to upper-case characters. +ABSL_DLL extern const char kToUpper[256]; + +// Declaration for the array of characters to lower-case characters. +ABSL_DLL extern const char kToLower[256]; + +} // namespace ascii_internal + +// ascii_isalpha() +// +// Determines whether the given character is an alphabetic character. +inline bool ascii_isalpha(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x01) != 0; +} + +// ascii_isalnum() +// +// Determines whether the given character is an alphanumeric character. +inline bool ascii_isalnum(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x04) != 0; +} + +// ascii_isspace() +// +// Determines whether the given character is a whitespace character (space, +// tab, vertical tab, formfeed, linefeed, or carriage return). +inline bool ascii_isspace(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x08) != 0; +} + +// ascii_ispunct() +// +// Determines whether the given character is a punctuation character. +inline bool ascii_ispunct(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x10) != 0; +} + +// ascii_isblank() +// +// Determines whether the given character is a blank character (tab or space). +inline bool ascii_isblank(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x20) != 0; +} + +// ascii_iscntrl() +// +// Determines whether the given character is a control character. +inline bool ascii_iscntrl(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x40) != 0; +} + +// ascii_isxdigit() +// +// Determines whether the given character can be represented as a hexadecimal +// digit character (i.e. {0-9} or {A-F}). +inline bool ascii_isxdigit(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x80) != 0; +} + +// ascii_isdigit() +// +// Determines whether the given character can be represented as a decimal +// digit character (i.e. {0-9}). +inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } + +// ascii_isprint() +// +// Determines whether the given character is printable, including whitespace. +inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } + +// ascii_isgraph() +// +// Determines whether the given character has a graphical representation. +inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } + +// ascii_isupper() +// +// Determines whether the given character is uppercase. +inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } + +// ascii_islower() +// +// Determines whether the given character is lowercase. +inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } + +// ascii_isascii() +// +// Determines whether the given character is ASCII. +inline bool ascii_isascii(unsigned char c) { return c < 128; } + +// ascii_tolower() +// +// Returns an ASCII character, converting to lowercase if uppercase is +// passed. Note that character values > 127 are simply returned. +inline char ascii_tolower(unsigned char c) { + return ascii_internal::kToLower[c]; +} + +// Converts the characters in `s` to lowercase, changing the contents of `s`. +void AsciiStrToLower(TString* s); + +// Creates a lowercase string from a given y_absl::string_view. +ABSL_MUST_USE_RESULT inline TString AsciiStrToLower(y_absl::string_view s) { + TString result(s); + y_absl::AsciiStrToLower(&result); + return result; +} + +// ascii_toupper() +// +// Returns the ASCII character, converting to upper-case if lower-case is +// passed. Note that characters values > 127 are simply returned. +inline char ascii_toupper(unsigned char c) { + return ascii_internal::kToUpper[c]; +} + +// Converts the characters in `s` to uppercase, changing the contents of `s`. +void AsciiStrToUpper(TString* s); + +// Creates an uppercase string from a given y_absl::string_view. +ABSL_MUST_USE_RESULT inline TString AsciiStrToUpper(y_absl::string_view s) { + TString result(s); + y_absl::AsciiStrToUpper(&result); + return result; +} + +// Returns y_absl::string_view with whitespace stripped from the beginning of the +// given string_view. +ABSL_MUST_USE_RESULT inline y_absl::string_view StripLeadingAsciiWhitespace( + y_absl::string_view str) { + auto it = std::find_if_not(str.begin(), str.end(), y_absl::ascii_isspace); + return str.substr(it - str.begin()); +} + +// Strips in place whitespace from the beginning of the given string. +inline void StripLeadingAsciiWhitespace(TString* str) { + auto it = std::find_if_not(str->cbegin(), str->cend(), y_absl::ascii_isspace); + str->erase(str->begin(), it); +} + +// Returns y_absl::string_view with whitespace stripped from the end of the given +// string_view. +ABSL_MUST_USE_RESULT inline y_absl::string_view StripTrailingAsciiWhitespace( + y_absl::string_view str) { + auto it = std::find_if_not(str.rbegin(), str.rend(), y_absl::ascii_isspace); + return str.substr(0, str.rend() - it); +} + +// Strips in place whitespace from the end of the given string +inline void StripTrailingAsciiWhitespace(TString* str) { + auto it = std::find_if_not(str->rbegin(), str->rend(), y_absl::ascii_isspace); + str->erase(str->rend() - it); +} + +// Returns y_absl::string_view with whitespace stripped from both ends of the +// given string_view. +ABSL_MUST_USE_RESULT inline y_absl::string_view StripAsciiWhitespace( + y_absl::string_view str) { + return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); +} + +// Strips in place whitespace from both ends of the given string +inline void StripAsciiWhitespace(TString* str) { + StripTrailingAsciiWhitespace(str); + StripLeadingAsciiWhitespace(str); +} + +// Removes leading, trailing, and consecutive internal whitespace. +void RemoveExtraAsciiWhitespace(TString*); + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_ASCII_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.cc new file mode 100644 index 0000000000..9515ca24dd --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.cc @@ -0,0 +1,984 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/charconv.h" + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <cstring> + +#include "y_absl/base/casts.h" +#include "y_absl/numeric/bits.h" +#include "y_absl/numeric/int128.h" +#include "y_absl/strings/internal/charconv_bigint.h" +#include "y_absl/strings/internal/charconv_parse.h" + +// The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating +// point numbers have the same endianness in memory as a bitfield struct +// containing the corresponding parts. +// +// When set, we replace calls to ldexp() with manual bit packing, which is +// faster and is unaffected by floating point environment. +#ifdef ABSL_BIT_PACK_FLOATS +#error ABSL_BIT_PACK_FLOATS cannot be directly set +#elif defined(__x86_64__) || defined(_M_X64) +#define ABSL_BIT_PACK_FLOATS 1 +#endif + +// A note about subnormals: +// +// The code below talks about "normals" and "subnormals". A normal IEEE float +// has a fixed-width mantissa and power of two exponent. For example, a normal +// `double` has a 53-bit mantissa. Because the high bit is always 1, it is not +// stored in the representation. The implicit bit buys an extra bit of +// resolution in the datatype. +// +// The downside of this scheme is that there is a large gap between DBL_MIN and +// zero. (Large, at least, relative to the different between DBL_MIN and the +// next representable number). This gap is softened by the "subnormal" numbers, +// which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd +// bit. An all-bits-zero exponent in the encoding represents subnormals. (Zero +// is represented as a subnormal with an all-bits-zero mantissa.) +// +// The code below, in calculations, represents the mantissa as a uint64_t. The +// end result normally has the 53rd bit set. It represents subnormals by using +// narrower mantissas. + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace { + +template <typename FloatType> +struct FloatTraits; + +template <> +struct FloatTraits<double> { + // The number of mantissa bits in the given float type. This includes the + // implied high bit. + static constexpr int kTargetMantissaBits = 53; + + // The largest supported IEEE exponent, in our integral mantissa + // representation. + // + // If `m` is the largest possible int kTargetMantissaBits bits wide, then + // m * 2**kMaxExponent is exactly equal to DBL_MAX. + static constexpr int kMaxExponent = 971; + + // The smallest supported IEEE normal exponent, in our integral mantissa + // representation. + // + // If `m` is the smallest possible int kTargetMantissaBits bits wide, then + // m * 2**kMinNormalExponent is exactly equal to DBL_MIN. + static constexpr int kMinNormalExponent = -1074; + + static double MakeNan(const char* tagp) { + // Support nan no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nan(tagp); + } + + // Builds a nonzero floating point number out of the provided parts. + // + // This is intended to do the same operation as ldexp(mantissa, exponent), + // but using purely integer math, to avoid -ffastmath and floating + // point environment issues. Using type punning is also faster. We fall back + // to ldexp on a per-platform basis for portability. + // + // `exponent` must be between kMinNormalExponent and kMaxExponent. + // + // `mantissa` must either be exactly kTargetMantissaBits wide, in which case + // a normal value is made, or it must be less narrow than that, in which case + // `exponent` must be exactly kMinNormalExponent, and a subnormal value is + // made. + static double Make(uint64_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexp no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent); +#else + constexpr uint64_t kMantissaMask = + (uint64_t{1} << (kTargetMantissaBits - 1)) - 1; + uint64_t dbl = static_cast<uint64_t>(sign) << 63; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 1023 for the exponent representation bias, and an additional + // 52 due to the implied decimal point in the IEEE mantissa represenation. + dbl += uint64_t{exponent + 1023u + kTargetMantissaBits - 1} << 52; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + dbl += mantissa; + return y_absl::bit_cast<double>(dbl); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Specialization of floating point traits for the `float` type. See the +// FloatTraits<double> specialization above for meaning of each of the following +// members and methods. +template <> +struct FloatTraits<float> { + static constexpr int kTargetMantissaBits = 24; + static constexpr int kMaxExponent = 104; + static constexpr int kMinNormalExponent = -149; + static float MakeNan(const char* tagp) { + // Support nanf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nanf(tagp); + } + static float Make(uint32_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexpf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent); +#else + constexpr uint32_t kMantissaMask = + (uint32_t{1} << (kTargetMantissaBits - 1)) - 1; + uint32_t flt = static_cast<uint32_t>(sign) << 31; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 127 for the exponent representation bias, and an additional + // 23 due to the implied decimal point in the IEEE mantissa represenation. + flt += uint32_t{exponent + 127u + kTargetMantissaBits - 1} << 23; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + flt += mantissa; + return y_absl::bit_cast<float>(flt); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Decimal-to-binary conversions require coercing powers of 10 into a mantissa +// and a power of 2. The two helper functions Power10Mantissa(n) and +// Power10Exponent(n) perform this task. Together, these represent a hand- +// rolled floating point value which is equal to or just less than 10**n. +// +// The return values satisfy two range guarantees: +// +// Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n +// < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n) +// +// 2**63 <= Power10Mantissa(n) < 2**64. +// +// Lookups into the power-of-10 table must first check the Power10Overflow() and +// Power10Underflow() functions, to avoid out-of-bounds table access. +// +// Indexes into these tables are biased by -kPower10TableMin, and the table has +// values in the range [kPower10TableMin, kPower10TableMax]. +extern const uint64_t kPower10MantissaTable[]; +extern const int16_t kPower10ExponentTable[]; + +// The smallest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to underflow.) +constexpr int kPower10TableMin = -342; + +// The largest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to overflow.) +constexpr int kPower10TableMax = 308; + +uint64_t Power10Mantissa(int n) { + return kPower10MantissaTable[n - kPower10TableMin]; +} + +int Power10Exponent(int n) { + return kPower10ExponentTable[n - kPower10TableMin]; +} + +// Returns true if n is large enough that 10**n always results in an IEEE +// overflow. +bool Power10Overflow(int n) { return n > kPower10TableMax; } + +// Returns true if n is small enough that 10**n times a ParsedFloat mantissa +// always results in an IEEE underflow. +bool Power10Underflow(int n) { return n < kPower10TableMin; } + +// Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal +// to 10**n numerically. Put another way, this returns true if there is no +// truncation error in Power10Mantissa(n). +bool Power10Exact(int n) { return n >= 0 && n <= 27; } + +// Sentinel exponent values for representing numbers too large or too close to +// zero to represent in a double. +constexpr int kOverflow = 99999; +constexpr int kUnderflow = -99999; + +// Struct representing the calculated conversion result of a positive (nonzero) +// floating point number. +// +// The calculated number is mantissa * 2**exponent (mantissa is treated as an +// integer.) `mantissa` is chosen to be the correct width for the IEEE float +// representation being calculated. (`mantissa` will always have the same bit +// width for normal values, and narrower bit widths for subnormals.) +// +// If the result of conversion was an underflow or overflow, exponent is set +// to kUnderflow or kOverflow. +struct CalculatedFloat { + uint64_t mantissa = 0; + int exponent = 0; +}; + +// Returns the bit width of the given uint128. (Equivalently, returns 128 +// minus the number of leading zero bits.) +unsigned BitWidth(uint128 value) { + if (Uint128High64(value) == 0) { + return static_cast<unsigned>(bit_width(Uint128Low64(value))); + } + return 128 - countl_zero(Uint128High64(value)); +} + +// Calculates how far to the right a mantissa needs to be shifted to create a +// properly adjusted mantissa for an IEEE floating point number. +// +// `mantissa_width` is the bit width of the mantissa to be shifted, and +// `binary_exponent` is the exponent of the number before the shift. +// +// This accounts for subnormal values, and will return a larger-than-normal +// shift if binary_exponent would otherwise be too low. +template <typename FloatType> +int NormalizedShiftSize(int mantissa_width, int binary_exponent) { + const int normal_shift = + mantissa_width - FloatTraits<FloatType>::kTargetMantissaBits; + const int minimum_shift = + FloatTraits<FloatType>::kMinNormalExponent - binary_exponent; + return std::max(normal_shift, minimum_shift); +} + +// Right shifts a uint128 so that it has the requested bit width. (The +// resulting value will have 128 - bit_width leading zeroes.) The initial +// `value` must be wider than the requested bit width. +// +// Returns the number of bits shifted. +int TruncateToBitWidth(int bit_width, uint128* value) { + const int current_bit_width = BitWidth(*value); + const int shift = current_bit_width - bit_width; + *value >>= shift; + return shift; +} + +// Checks if the given ParsedFloat represents one of the edge cases that are +// not dependent on number base: zero, infinity, or NaN. If so, sets *value +// the appropriate double, and returns true. +template <typename FloatType> +bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative, + FloatType* value) { + if (input.type == strings_internal::FloatType::kNan) { + // A bug in both clang and gcc would cause the compiler to optimize away the + // buffer we are building below. Declaring the buffer volatile avoids the + // issue, and has no measurable performance impact in microbenchmarks. + // + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113 + constexpr ptrdiff_t kNanBufferSize = 128; + volatile char n_char_sequence[kNanBufferSize]; + if (input.subrange_begin == nullptr) { + n_char_sequence[0] = '\0'; + } else { + ptrdiff_t nan_size = input.subrange_end - input.subrange_begin; + nan_size = std::min(nan_size, kNanBufferSize - 1); + std::copy_n(input.subrange_begin, nan_size, n_char_sequence); + n_char_sequence[nan_size] = '\0'; + } + char* nan_argument = const_cast<char*>(n_char_sequence); + *value = negative ? -FloatTraits<FloatType>::MakeNan(nan_argument) + : FloatTraits<FloatType>::MakeNan(nan_argument); + return true; + } + if (input.type == strings_internal::FloatType::kInfinity) { + *value = negative ? -std::numeric_limits<FloatType>::infinity() + : std::numeric_limits<FloatType>::infinity(); + return true; + } + if (input.mantissa == 0) { + *value = negative ? -0.0 : 0.0; + return true; + } + return false; +} + +// Given a CalculatedFloat result of a from_chars conversion, generate the +// correct output values. +// +// CalculatedFloat can represent an underflow or overflow, in which case the +// error code in *result is set. Otherwise, the calculated floating point +// number is stored in *value. +template <typename FloatType> +void EncodeResult(const CalculatedFloat& calculated, bool negative, + y_absl::from_chars_result* result, FloatType* value) { + if (calculated.exponent == kOverflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -std::numeric_limits<FloatType>::max() + : std::numeric_limits<FloatType>::max(); + return; + } else if (calculated.mantissa == 0 || calculated.exponent == kUnderflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -0.0 : 0.0; + return; + } + *value = FloatTraits<FloatType>::Make(calculated.mantissa, + calculated.exponent, negative); +} + +// Returns the given uint128 shifted to the right by `shift` bits, and rounds +// the remaining bits using round_to_nearest logic. The value is returned as a +// uint64_t, since this is the type used by this library for storing calculated +// floating point mantissas. +// +// It is expected that the width of the input value shifted by `shift` will +// be the correct bit-width for the target mantissa, which is strictly narrower +// than a uint64_t. +// +// If `input_exact` is false, then a nonzero error epsilon is assumed. For +// rounding purposes, the true value being rounded is strictly greater than the +// input value. The error may represent a single lost carry bit. +// +// When input_exact, shifted bits of the form 1000000... represent a tie, which +// is broken by rounding to even -- the rounding direction is chosen so the low +// bit of the returned value is 0. +// +// When !input_exact, shifted bits of the form 10000000... represent a value +// strictly greater than one half (due to the error epsilon), and so ties are +// always broken by rounding up. +// +// When !input_exact, shifted bits of the form 01111111... are uncertain; +// the true value may or may not be greater than 10000000..., due to the +// possible lost carry bit. The correct rounding direction is unknown. In this +// case, the result is rounded down, and `output_exact` is set to false. +// +// Zero and negative values of `shift` are accepted, in which case the word is +// shifted left, as necessary. +uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, + bool* output_exact) { + if (shift <= 0) { + *output_exact = input_exact; + return static_cast<uint64_t>(value << -shift); + } + if (shift >= 128) { + // Exponent is so small that we are shifting away all significant bits. + // Answer will not be representable, even as a subnormal, so return a zero + // mantissa (which represents underflow). + *output_exact = true; + return 0; + } + + *output_exact = true; + const uint128 shift_mask = (uint128(1) << shift) - 1; + const uint128 halfway_point = uint128(1) << (shift - 1); + + const uint128 shifted_bits = value & shift_mask; + value >>= shift; + if (shifted_bits > halfway_point) { + // Shifted bits greater than 10000... require rounding up. + return static_cast<uint64_t>(value + 1); + } + if (shifted_bits == halfway_point) { + // In exact mode, shifted bits of 10000... mean we're exactly halfway + // between two numbers, and we must round to even. So only round up if + // the low bit of `value` is set. + // + // In inexact mode, the nonzero error means the actual value is greater + // than the halfway point and we must alway round up. + if ((value & 1) == 1 || !input_exact) { + ++value; + } + return static_cast<uint64_t>(value); + } + if (!input_exact && shifted_bits == halfway_point - 1) { + // Rounding direction is unclear, due to error. + *output_exact = false; + } + // Otherwise, round down. + return static_cast<uint64_t>(value); +} + +// Checks if a floating point guess needs to be rounded up, using high precision +// math. +// +// `guess_mantissa` and `guess_exponent` represent a candidate guess for the +// number represented by `parsed_decimal`. +// +// The exact number represented by `parsed_decimal` must lie between the two +// numbers: +// A = `guess_mantissa * 2**guess_exponent` +// B = `(guess_mantissa + 1) * 2**guess_exponent` +// +// This function returns false if `A` is the better guess, and true if `B` is +// the better guess, with rounding ties broken by rounding to even. +bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, + const strings_internal::ParsedFloat& parsed_decimal) { + // 768 is the number of digits needed in the worst case. We could determine a + // better limit dynamically based on the value of parsed_decimal.exponent. + // This would optimize pathological input cases only. (Sane inputs won't have + // hundreds of digits of mantissa.) + y_absl::strings_internal::BigUnsigned<84> exact_mantissa; + int exact_exponent = exact_mantissa.ReadFloatMantissa(parsed_decimal, 768); + + // Adjust the `guess` arguments to be halfway between A and B. + guess_mantissa = guess_mantissa * 2 + 1; + guess_exponent -= 1; + + // In our comparison: + // lhs = exact = exact_mantissa * 10**exact_exponent + // = exact_mantissa * 5**exact_exponent * 2**exact_exponent + // rhs = guess = guess_mantissa * 2**guess_exponent + // + // Because we are doing integer math, we can't directly deal with negative + // exponents. We instead move these to the other side of the inequality. + y_absl::strings_internal::BigUnsigned<84>& lhs = exact_mantissa; + int comparison; + if (exact_exponent >= 0) { + lhs.MultiplyByFiveToTheNth(exact_exponent); + y_absl::strings_internal::BigUnsigned<84> rhs(guess_mantissa); + // There are powers of 2 on both sides of the inequality; reduce this to + // a single bit-shift. + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } else { + // Move the power of 5 to the other side of the equation, giving us: + // lhs = exact_mantissa * 2**exact_exponent + // rhs = guess_mantissa * 5**(-exact_exponent) * 2**guess_exponent + y_absl::strings_internal::BigUnsigned<84> rhs = + y_absl::strings_internal::BigUnsigned<84>::FiveToTheNth(-exact_exponent); + rhs.MultiplyBy(guess_mantissa); + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } + if (comparison < 0) { + return false; + } else if (comparison > 0) { + return true; + } else { + // When lhs == rhs, the decimal input is exactly between A and B. + // Round towards even -- round up only if the low bit of the initial + // `guess_mantissa` was a 1. We shifted guess_mantissa left 1 bit at + // the beginning of this function, so test the 2nd bit here. + return (guess_mantissa & 2) == 2; + } +} + +// Constructs a CalculatedFloat from a given mantissa and exponent, but +// with the following normalizations applied: +// +// If rounding has caused mantissa to increase just past the allowed bit +// width, shift and adjust exponent. +// +// If exponent is too high, sets kOverflow. +// +// If mantissa is zero (representing a non-zero value not representable, even +// as a subnormal), sets kUnderflow. +template <typename FloatType> +CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { + CalculatedFloat result; + if (mantissa == uint64_t{1} << FloatTraits<FloatType>::kTargetMantissaBits) { + mantissa >>= 1; + exponent += 1; + } + if (exponent > FloatTraits<FloatType>::kMaxExponent) { + result.exponent = kOverflow; + } else if (mantissa == 0) { + result.exponent = kUnderflow; + } else { + result.exponent = exponent; + result.mantissa = mantissa; + } + return result; +} + +template <typename FloatType> +CalculatedFloat CalculateFromParsedHexadecimal( + const strings_internal::ParsedFloat& parsed_hex) { + uint64_t mantissa = parsed_hex.mantissa; + int exponent = parsed_hex.exponent; + auto mantissa_width = static_cast<unsigned>(bit_width(mantissa)); + const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent); + bool result_exact; + exponent += shift; + mantissa = ShiftRightAndRound(mantissa, shift, + /* input exact= */ true, &result_exact); + // ParseFloat handles rounding in the hexadecimal case, so we don't have to + // check `result_exact` here. + return CalculatedFloatFromRawValues<FloatType>(mantissa, exponent); +} + +template <typename FloatType> +CalculatedFloat CalculateFromParsedDecimal( + const strings_internal::ParsedFloat& parsed_decimal) { + CalculatedFloat result; + + // Large or small enough decimal exponents will always result in overflow + // or underflow. + if (Power10Underflow(parsed_decimal.exponent)) { + result.exponent = kUnderflow; + return result; + } else if (Power10Overflow(parsed_decimal.exponent)) { + result.exponent = kOverflow; + return result; + } + + // Otherwise convert our power of 10 into a power of 2 times an integer + // mantissa, and multiply this by our parsed decimal mantissa. + uint128 wide_binary_mantissa = parsed_decimal.mantissa; + wide_binary_mantissa *= Power10Mantissa(parsed_decimal.exponent); + int binary_exponent = Power10Exponent(parsed_decimal.exponent); + + // Discard bits that are inaccurate due to truncation error. The magic + // `mantissa_width` constants below are justified in + // https://abseil.io/about/design/charconv. They represent the number of bits + // in `wide_binary_mantissa` that are guaranteed to be unaffected by error + // propagation. + bool mantissa_exact; + int mantissa_width; + if (parsed_decimal.subrange_begin) { + // Truncated mantissa + mantissa_width = 58; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else if (!Power10Exact(parsed_decimal.exponent)) { + // Exact mantissa, truncated power of ten + mantissa_width = 63; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else { + // Product is exact + mantissa_width = BitWidth(wide_binary_mantissa); + mantissa_exact = true; + } + + // Shift into an FloatType-sized mantissa, and round to nearest. + const int shift = + NormalizedShiftSize<FloatType>(mantissa_width, binary_exponent); + bool result_exact; + binary_exponent += shift; + uint64_t binary_mantissa = ShiftRightAndRound(wide_binary_mantissa, shift, + mantissa_exact, &result_exact); + if (!result_exact) { + // We could not determine the rounding direction using int128 math. Use + // full resolution math instead. + if (MustRoundUp(binary_mantissa, binary_exponent, parsed_decimal)) { + binary_mantissa += 1; + } + } + + return CalculatedFloatFromRawValues<FloatType>(binary_mantissa, + binary_exponent); +} + +template <typename FloatType> +from_chars_result FromCharsImpl(const char* first, const char* last, + FloatType& value, chars_format fmt_flags) { + from_chars_result result; + result.ptr = first; // overwritten on successful parse + result.ec = std::errc(); + + bool negative = false; + if (first != last && *first == '-') { + ++first; + negative = true; + } + // If the `hex` flag is *not* set, then we will accept a 0x prefix and try + // to parse a hexadecimal float. + if ((fmt_flags & chars_format::hex) == chars_format{} && last - first >= 2 && + *first == '0' && (first[1] == 'x' || first[1] == 'X')) { + const char* hex_first = first + 2; + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(hex_first, last, fmt_flags); + if (hex_parse.end == nullptr || + hex_parse.type != strings_internal::FloatType::kNumber) { + // Either we failed to parse a hex float after the "0x", or we read + // "0xinf" or "0xnan" which we don't want to match. + // + // However, a string that begins with "0x" also begins with "0", which + // is normally a valid match for the number zero. So we want these + // strings to match zero unless fmt_flags is `scientific`. (This flag + // means an exponent is required, which the string "0" does not have.) + if (fmt_flags == chars_format::scientific) { + result.ec = std::errc::invalid_argument; + } else { + result.ptr = first + 1; + value = negative ? -0.0 : 0.0; + } + return result; + } + // We matched a value. + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal<FloatType>(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } + // Otherwise, we choose the number base based on the flags. + if ((fmt_flags & chars_format::hex) == chars_format::hex) { + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(first, last, fmt_flags); + if (hex_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal<FloatType>(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } else { + strings_internal::ParsedFloat decimal_parse = + strings_internal::ParseFloat<10>(first, last, fmt_flags); + if (decimal_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = decimal_parse.end; + if (HandleEdgeCase(decimal_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedDecimal<FloatType>(decimal_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } +} +} // namespace + +from_chars_result from_chars(const char* first, const char* last, double& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +from_chars_result from_chars(const char* first, const char* last, float& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +namespace { + +// Table of powers of 10, from kPower10TableMin to kPower10TableMax. +// +// kPower10MantissaTable[i - kPower10TableMin] stores the 64-bit mantissa (high +// bit always on), and kPower10ExponentTable[i - kPower10TableMin] stores the +// power-of-two exponent. For a given number i, this gives the unique mantissa +// and exponent such that mantissa * 2**exponent <= 10**i < (mantissa + 1) * +// 2**exponent. + +const uint64_t kPower10MantissaTable[] = { + 0xeef453d6923bd65aU, 0x9558b4661b6565f8U, 0xbaaee17fa23ebf76U, + 0xe95a99df8ace6f53U, 0x91d8a02bb6c10594U, 0xb64ec836a47146f9U, + 0xe3e27a444d8d98b7U, 0x8e6d8c6ab0787f72U, 0xb208ef855c969f4fU, + 0xde8b2b66b3bc4723U, 0x8b16fb203055ac76U, 0xaddcb9e83c6b1793U, + 0xd953e8624b85dd78U, 0x87d4713d6f33aa6bU, 0xa9c98d8ccb009506U, + 0xd43bf0effdc0ba48U, 0x84a57695fe98746dU, 0xa5ced43b7e3e9188U, + 0xcf42894a5dce35eaU, 0x818995ce7aa0e1b2U, 0xa1ebfb4219491a1fU, + 0xca66fa129f9b60a6U, 0xfd00b897478238d0U, 0x9e20735e8cb16382U, + 0xc5a890362fddbc62U, 0xf712b443bbd52b7bU, 0x9a6bb0aa55653b2dU, + 0xc1069cd4eabe89f8U, 0xf148440a256e2c76U, 0x96cd2a865764dbcaU, + 0xbc807527ed3e12bcU, 0xeba09271e88d976bU, 0x93445b8731587ea3U, + 0xb8157268fdae9e4cU, 0xe61acf033d1a45dfU, 0x8fd0c16206306babU, + 0xb3c4f1ba87bc8696U, 0xe0b62e2929aba83cU, 0x8c71dcd9ba0b4925U, + 0xaf8e5410288e1b6fU, 0xdb71e91432b1a24aU, 0x892731ac9faf056eU, + 0xab70fe17c79ac6caU, 0xd64d3d9db981787dU, 0x85f0468293f0eb4eU, + 0xa76c582338ed2621U, 0xd1476e2c07286faaU, 0x82cca4db847945caU, + 0xa37fce126597973cU, 0xcc5fc196fefd7d0cU, 0xff77b1fcbebcdc4fU, + 0x9faacf3df73609b1U, 0xc795830d75038c1dU, 0xf97ae3d0d2446f25U, + 0x9becce62836ac577U, 0xc2e801fb244576d5U, 0xf3a20279ed56d48aU, + 0x9845418c345644d6U, 0xbe5691ef416bd60cU, 0xedec366b11c6cb8fU, + 0x94b3a202eb1c3f39U, 0xb9e08a83a5e34f07U, 0xe858ad248f5c22c9U, + 0x91376c36d99995beU, 0xb58547448ffffb2dU, 0xe2e69915b3fff9f9U, + 0x8dd01fad907ffc3bU, 0xb1442798f49ffb4aU, 0xdd95317f31c7fa1dU, + 0x8a7d3eef7f1cfc52U, 0xad1c8eab5ee43b66U, 0xd863b256369d4a40U, + 0x873e4f75e2224e68U, 0xa90de3535aaae202U, 0xd3515c2831559a83U, + 0x8412d9991ed58091U, 0xa5178fff668ae0b6U, 0xce5d73ff402d98e3U, + 0x80fa687f881c7f8eU, 0xa139029f6a239f72U, 0xc987434744ac874eU, + 0xfbe9141915d7a922U, 0x9d71ac8fada6c9b5U, 0xc4ce17b399107c22U, + 0xf6019da07f549b2bU, 0x99c102844f94e0fbU, 0xc0314325637a1939U, + 0xf03d93eebc589f88U, 0x96267c7535b763b5U, 0xbbb01b9283253ca2U, + 0xea9c227723ee8bcbU, 0x92a1958a7675175fU, 0xb749faed14125d36U, + 0xe51c79a85916f484U, 0x8f31cc0937ae58d2U, 0xb2fe3f0b8599ef07U, + 0xdfbdcece67006ac9U, 0x8bd6a141006042bdU, 0xaecc49914078536dU, + 0xda7f5bf590966848U, 0x888f99797a5e012dU, 0xaab37fd7d8f58178U, + 0xd5605fcdcf32e1d6U, 0x855c3be0a17fcd26U, 0xa6b34ad8c9dfc06fU, + 0xd0601d8efc57b08bU, 0x823c12795db6ce57U, 0xa2cb1717b52481edU, + 0xcb7ddcdda26da268U, 0xfe5d54150b090b02U, 0x9efa548d26e5a6e1U, + 0xc6b8e9b0709f109aU, 0xf867241c8cc6d4c0U, 0x9b407691d7fc44f8U, + 0xc21094364dfb5636U, 0xf294b943e17a2bc4U, 0x979cf3ca6cec5b5aU, + 0xbd8430bd08277231U, 0xece53cec4a314ebdU, 0x940f4613ae5ed136U, + 0xb913179899f68584U, 0xe757dd7ec07426e5U, 0x9096ea6f3848984fU, + 0xb4bca50b065abe63U, 0xe1ebce4dc7f16dfbU, 0x8d3360f09cf6e4bdU, + 0xb080392cc4349decU, 0xdca04777f541c567U, 0x89e42caaf9491b60U, + 0xac5d37d5b79b6239U, 0xd77485cb25823ac7U, 0x86a8d39ef77164bcU, + 0xa8530886b54dbdebU, 0xd267caa862a12d66U, 0x8380dea93da4bc60U, + 0xa46116538d0deb78U, 0xcd795be870516656U, 0x806bd9714632dff6U, + 0xa086cfcd97bf97f3U, 0xc8a883c0fdaf7df0U, 0xfad2a4b13d1b5d6cU, + 0x9cc3a6eec6311a63U, 0xc3f490aa77bd60fcU, 0xf4f1b4d515acb93bU, + 0x991711052d8bf3c5U, 0xbf5cd54678eef0b6U, 0xef340a98172aace4U, + 0x9580869f0e7aac0eU, 0xbae0a846d2195712U, 0xe998d258869facd7U, + 0x91ff83775423cc06U, 0xb67f6455292cbf08U, 0xe41f3d6a7377eecaU, + 0x8e938662882af53eU, 0xb23867fb2a35b28dU, 0xdec681f9f4c31f31U, + 0x8b3c113c38f9f37eU, 0xae0b158b4738705eU, 0xd98ddaee19068c76U, + 0x87f8a8d4cfa417c9U, 0xa9f6d30a038d1dbcU, 0xd47487cc8470652bU, + 0x84c8d4dfd2c63f3bU, 0xa5fb0a17c777cf09U, 0xcf79cc9db955c2ccU, + 0x81ac1fe293d599bfU, 0xa21727db38cb002fU, 0xca9cf1d206fdc03bU, + 0xfd442e4688bd304aU, 0x9e4a9cec15763e2eU, 0xc5dd44271ad3cdbaU, + 0xf7549530e188c128U, 0x9a94dd3e8cf578b9U, 0xc13a148e3032d6e7U, + 0xf18899b1bc3f8ca1U, 0x96f5600f15a7b7e5U, 0xbcb2b812db11a5deU, + 0xebdf661791d60f56U, 0x936b9fcebb25c995U, 0xb84687c269ef3bfbU, + 0xe65829b3046b0afaU, 0x8ff71a0fe2c2e6dcU, 0xb3f4e093db73a093U, + 0xe0f218b8d25088b8U, 0x8c974f7383725573U, 0xafbd2350644eeacfU, + 0xdbac6c247d62a583U, 0x894bc396ce5da772U, 0xab9eb47c81f5114fU, + 0xd686619ba27255a2U, 0x8613fd0145877585U, 0xa798fc4196e952e7U, + 0xd17f3b51fca3a7a0U, 0x82ef85133de648c4U, 0xa3ab66580d5fdaf5U, + 0xcc963fee10b7d1b3U, 0xffbbcfe994e5c61fU, 0x9fd561f1fd0f9bd3U, + 0xc7caba6e7c5382c8U, 0xf9bd690a1b68637bU, 0x9c1661a651213e2dU, + 0xc31bfa0fe5698db8U, 0xf3e2f893dec3f126U, 0x986ddb5c6b3a76b7U, + 0xbe89523386091465U, 0xee2ba6c0678b597fU, 0x94db483840b717efU, + 0xba121a4650e4ddebU, 0xe896a0d7e51e1566U, 0x915e2486ef32cd60U, + 0xb5b5ada8aaff80b8U, 0xe3231912d5bf60e6U, 0x8df5efabc5979c8fU, + 0xb1736b96b6fd83b3U, 0xddd0467c64bce4a0U, 0x8aa22c0dbef60ee4U, + 0xad4ab7112eb3929dU, 0xd89d64d57a607744U, 0x87625f056c7c4a8bU, + 0xa93af6c6c79b5d2dU, 0xd389b47879823479U, 0x843610cb4bf160cbU, + 0xa54394fe1eedb8feU, 0xce947a3da6a9273eU, 0x811ccc668829b887U, + 0xa163ff802a3426a8U, 0xc9bcff6034c13052U, 0xfc2c3f3841f17c67U, + 0x9d9ba7832936edc0U, 0xc5029163f384a931U, 0xf64335bcf065d37dU, + 0x99ea0196163fa42eU, 0xc06481fb9bcf8d39U, 0xf07da27a82c37088U, + 0x964e858c91ba2655U, 0xbbe226efb628afeaU, 0xeadab0aba3b2dbe5U, + 0x92c8ae6b464fc96fU, 0xb77ada0617e3bbcbU, 0xe55990879ddcaabdU, + 0x8f57fa54c2a9eab6U, 0xb32df8e9f3546564U, 0xdff9772470297ebdU, + 0x8bfbea76c619ef36U, 0xaefae51477a06b03U, 0xdab99e59958885c4U, + 0x88b402f7fd75539bU, 0xaae103b5fcd2a881U, 0xd59944a37c0752a2U, + 0x857fcae62d8493a5U, 0xa6dfbd9fb8e5b88eU, 0xd097ad07a71f26b2U, + 0x825ecc24c873782fU, 0xa2f67f2dfa90563bU, 0xcbb41ef979346bcaU, + 0xfea126b7d78186bcU, 0x9f24b832e6b0f436U, 0xc6ede63fa05d3143U, + 0xf8a95fcf88747d94U, 0x9b69dbe1b548ce7cU, 0xc24452da229b021bU, + 0xf2d56790ab41c2a2U, 0x97c560ba6b0919a5U, 0xbdb6b8e905cb600fU, + 0xed246723473e3813U, 0x9436c0760c86e30bU, 0xb94470938fa89bceU, + 0xe7958cb87392c2c2U, 0x90bd77f3483bb9b9U, 0xb4ecd5f01a4aa828U, + 0xe2280b6c20dd5232U, 0x8d590723948a535fU, 0xb0af48ec79ace837U, + 0xdcdb1b2798182244U, 0x8a08f0f8bf0f156bU, 0xac8b2d36eed2dac5U, + 0xd7adf884aa879177U, 0x86ccbb52ea94baeaU, 0xa87fea27a539e9a5U, + 0xd29fe4b18e88640eU, 0x83a3eeeef9153e89U, 0xa48ceaaab75a8e2bU, + 0xcdb02555653131b6U, 0x808e17555f3ebf11U, 0xa0b19d2ab70e6ed6U, + 0xc8de047564d20a8bU, 0xfb158592be068d2eU, 0x9ced737bb6c4183dU, + 0xc428d05aa4751e4cU, 0xf53304714d9265dfU, 0x993fe2c6d07b7fabU, + 0xbf8fdb78849a5f96U, 0xef73d256a5c0f77cU, 0x95a8637627989aadU, + 0xbb127c53b17ec159U, 0xe9d71b689dde71afU, 0x9226712162ab070dU, + 0xb6b00d69bb55c8d1U, 0xe45c10c42a2b3b05U, 0x8eb98a7a9a5b04e3U, + 0xb267ed1940f1c61cU, 0xdf01e85f912e37a3U, 0x8b61313bbabce2c6U, + 0xae397d8aa96c1b77U, 0xd9c7dced53c72255U, 0x881cea14545c7575U, + 0xaa242499697392d2U, 0xd4ad2dbfc3d07787U, 0x84ec3c97da624ab4U, + 0xa6274bbdd0fadd61U, 0xcfb11ead453994baU, 0x81ceb32c4b43fcf4U, + 0xa2425ff75e14fc31U, 0xcad2f7f5359a3b3eU, 0xfd87b5f28300ca0dU, + 0x9e74d1b791e07e48U, 0xc612062576589ddaU, 0xf79687aed3eec551U, + 0x9abe14cd44753b52U, 0xc16d9a0095928a27U, 0xf1c90080baf72cb1U, + 0x971da05074da7beeU, 0xbce5086492111aeaU, 0xec1e4a7db69561a5U, + 0x9392ee8e921d5d07U, 0xb877aa3236a4b449U, 0xe69594bec44de15bU, + 0x901d7cf73ab0acd9U, 0xb424dc35095cd80fU, 0xe12e13424bb40e13U, + 0x8cbccc096f5088cbU, 0xafebff0bcb24aafeU, 0xdbe6fecebdedd5beU, + 0x89705f4136b4a597U, 0xabcc77118461cefcU, 0xd6bf94d5e57a42bcU, + 0x8637bd05af6c69b5U, 0xa7c5ac471b478423U, 0xd1b71758e219652bU, + 0x83126e978d4fdf3bU, 0xa3d70a3d70a3d70aU, 0xccccccccccccccccU, + 0x8000000000000000U, 0xa000000000000000U, 0xc800000000000000U, + 0xfa00000000000000U, 0x9c40000000000000U, 0xc350000000000000U, + 0xf424000000000000U, 0x9896800000000000U, 0xbebc200000000000U, + 0xee6b280000000000U, 0x9502f90000000000U, 0xba43b74000000000U, + 0xe8d4a51000000000U, 0x9184e72a00000000U, 0xb5e620f480000000U, + 0xe35fa931a0000000U, 0x8e1bc9bf04000000U, 0xb1a2bc2ec5000000U, + 0xde0b6b3a76400000U, 0x8ac7230489e80000U, 0xad78ebc5ac620000U, + 0xd8d726b7177a8000U, 0x878678326eac9000U, 0xa968163f0a57b400U, + 0xd3c21bcecceda100U, 0x84595161401484a0U, 0xa56fa5b99019a5c8U, + 0xcecb8f27f4200f3aU, 0x813f3978f8940984U, 0xa18f07d736b90be5U, + 0xc9f2c9cd04674edeU, 0xfc6f7c4045812296U, 0x9dc5ada82b70b59dU, + 0xc5371912364ce305U, 0xf684df56c3e01bc6U, 0x9a130b963a6c115cU, + 0xc097ce7bc90715b3U, 0xf0bdc21abb48db20U, 0x96769950b50d88f4U, + 0xbc143fa4e250eb31U, 0xeb194f8e1ae525fdU, 0x92efd1b8d0cf37beU, + 0xb7abc627050305adU, 0xe596b7b0c643c719U, 0x8f7e32ce7bea5c6fU, + 0xb35dbf821ae4f38bU, 0xe0352f62a19e306eU, 0x8c213d9da502de45U, + 0xaf298d050e4395d6U, 0xdaf3f04651d47b4cU, 0x88d8762bf324cd0fU, + 0xab0e93b6efee0053U, 0xd5d238a4abe98068U, 0x85a36366eb71f041U, + 0xa70c3c40a64e6c51U, 0xd0cf4b50cfe20765U, 0x82818f1281ed449fU, + 0xa321f2d7226895c7U, 0xcbea6f8ceb02bb39U, 0xfee50b7025c36a08U, + 0x9f4f2726179a2245U, 0xc722f0ef9d80aad6U, 0xf8ebad2b84e0d58bU, + 0x9b934c3b330c8577U, 0xc2781f49ffcfa6d5U, 0xf316271c7fc3908aU, + 0x97edd871cfda3a56U, 0xbde94e8e43d0c8ecU, 0xed63a231d4c4fb27U, + 0x945e455f24fb1cf8U, 0xb975d6b6ee39e436U, 0xe7d34c64a9c85d44U, + 0x90e40fbeea1d3a4aU, 0xb51d13aea4a488ddU, 0xe264589a4dcdab14U, + 0x8d7eb76070a08aecU, 0xb0de65388cc8ada8U, 0xdd15fe86affad912U, + 0x8a2dbf142dfcc7abU, 0xacb92ed9397bf996U, 0xd7e77a8f87daf7fbU, + 0x86f0ac99b4e8dafdU, 0xa8acd7c0222311bcU, 0xd2d80db02aabd62bU, + 0x83c7088e1aab65dbU, 0xa4b8cab1a1563f52U, 0xcde6fd5e09abcf26U, + 0x80b05e5ac60b6178U, 0xa0dc75f1778e39d6U, 0xc913936dd571c84cU, + 0xfb5878494ace3a5fU, 0x9d174b2dcec0e47bU, 0xc45d1df942711d9aU, + 0xf5746577930d6500U, 0x9968bf6abbe85f20U, 0xbfc2ef456ae276e8U, + 0xefb3ab16c59b14a2U, 0x95d04aee3b80ece5U, 0xbb445da9ca61281fU, + 0xea1575143cf97226U, 0x924d692ca61be758U, 0xb6e0c377cfa2e12eU, + 0xe498f455c38b997aU, 0x8edf98b59a373fecU, 0xb2977ee300c50fe7U, + 0xdf3d5e9bc0f653e1U, 0x8b865b215899f46cU, 0xae67f1e9aec07187U, + 0xda01ee641a708de9U, 0x884134fe908658b2U, 0xaa51823e34a7eedeU, + 0xd4e5e2cdc1d1ea96U, 0x850fadc09923329eU, 0xa6539930bf6bff45U, + 0xcfe87f7cef46ff16U, 0x81f14fae158c5f6eU, 0xa26da3999aef7749U, + 0xcb090c8001ab551cU, 0xfdcb4fa002162a63U, 0x9e9f11c4014dda7eU, + 0xc646d63501a1511dU, 0xf7d88bc24209a565U, 0x9ae757596946075fU, + 0xc1a12d2fc3978937U, 0xf209787bb47d6b84U, 0x9745eb4d50ce6332U, + 0xbd176620a501fbffU, 0xec5d3fa8ce427affU, 0x93ba47c980e98cdfU, + 0xb8a8d9bbe123f017U, 0xe6d3102ad96cec1dU, 0x9043ea1ac7e41392U, + 0xb454e4a179dd1877U, 0xe16a1dc9d8545e94U, 0x8ce2529e2734bb1dU, + 0xb01ae745b101e9e4U, 0xdc21a1171d42645dU, 0x899504ae72497ebaU, + 0xabfa45da0edbde69U, 0xd6f8d7509292d603U, 0x865b86925b9bc5c2U, + 0xa7f26836f282b732U, 0xd1ef0244af2364ffU, 0x8335616aed761f1fU, + 0xa402b9c5a8d3a6e7U, 0xcd036837130890a1U, 0x802221226be55a64U, + 0xa02aa96b06deb0fdU, 0xc83553c5c8965d3dU, 0xfa42a8b73abbf48cU, + 0x9c69a97284b578d7U, 0xc38413cf25e2d70dU, 0xf46518c2ef5b8cd1U, + 0x98bf2f79d5993802U, 0xbeeefb584aff8603U, 0xeeaaba2e5dbf6784U, + 0x952ab45cfa97a0b2U, 0xba756174393d88dfU, 0xe912b9d1478ceb17U, + 0x91abb422ccb812eeU, 0xb616a12b7fe617aaU, 0xe39c49765fdf9d94U, + 0x8e41ade9fbebc27dU, 0xb1d219647ae6b31cU, 0xde469fbd99a05fe3U, + 0x8aec23d680043beeU, 0xada72ccc20054ae9U, 0xd910f7ff28069da4U, + 0x87aa9aff79042286U, 0xa99541bf57452b28U, 0xd3fa922f2d1675f2U, + 0x847c9b5d7c2e09b7U, 0xa59bc234db398c25U, 0xcf02b2c21207ef2eU, + 0x8161afb94b44f57dU, 0xa1ba1ba79e1632dcU, 0xca28a291859bbf93U, + 0xfcb2cb35e702af78U, 0x9defbf01b061adabU, 0xc56baec21c7a1916U, + 0xf6c69a72a3989f5bU, 0x9a3c2087a63f6399U, 0xc0cb28a98fcf3c7fU, + 0xf0fdf2d3f3c30b9fU, 0x969eb7c47859e743U, 0xbc4665b596706114U, + 0xeb57ff22fc0c7959U, 0x9316ff75dd87cbd8U, 0xb7dcbf5354e9beceU, + 0xe5d3ef282a242e81U, 0x8fa475791a569d10U, 0xb38d92d760ec4455U, + 0xe070f78d3927556aU, 0x8c469ab843b89562U, 0xaf58416654a6babbU, + 0xdb2e51bfe9d0696aU, 0x88fcf317f22241e2U, 0xab3c2fddeeaad25aU, + 0xd60b3bd56a5586f1U, 0x85c7056562757456U, 0xa738c6bebb12d16cU, + 0xd106f86e69d785c7U, 0x82a45b450226b39cU, 0xa34d721642b06084U, + 0xcc20ce9bd35c78a5U, 0xff290242c83396ceU, 0x9f79a169bd203e41U, + 0xc75809c42c684dd1U, 0xf92e0c3537826145U, 0x9bbcc7a142b17ccbU, + 0xc2abf989935ddbfeU, 0xf356f7ebf83552feU, 0x98165af37b2153deU, + 0xbe1bf1b059e9a8d6U, 0xeda2ee1c7064130cU, 0x9485d4d1c63e8be7U, + 0xb9a74a0637ce2ee1U, 0xe8111c87c5c1ba99U, 0x910ab1d4db9914a0U, + 0xb54d5e4a127f59c8U, 0xe2a0b5dc971f303aU, 0x8da471a9de737e24U, + 0xb10d8e1456105dadU, 0xdd50f1996b947518U, 0x8a5296ffe33cc92fU, + 0xace73cbfdc0bfb7bU, 0xd8210befd30efa5aU, 0x8714a775e3e95c78U, + 0xa8d9d1535ce3b396U, 0xd31045a8341ca07cU, 0x83ea2b892091e44dU, + 0xa4e4b66b68b65d60U, 0xce1de40642e3f4b9U, 0x80d2ae83e9ce78f3U, + 0xa1075a24e4421730U, 0xc94930ae1d529cfcU, 0xfb9b7cd9a4a7443cU, + 0x9d412e0806e88aa5U, 0xc491798a08a2ad4eU, 0xf5b5d7ec8acb58a2U, + 0x9991a6f3d6bf1765U, 0xbff610b0cc6edd3fU, 0xeff394dcff8a948eU, + 0x95f83d0a1fb69cd9U, 0xbb764c4ca7a4440fU, 0xea53df5fd18d5513U, + 0x92746b9be2f8552cU, 0xb7118682dbb66a77U, 0xe4d5e82392a40515U, + 0x8f05b1163ba6832dU, 0xb2c71d5bca9023f8U, 0xdf78e4b2bd342cf6U, + 0x8bab8eefb6409c1aU, 0xae9672aba3d0c320U, 0xda3c0f568cc4f3e8U, + 0x8865899617fb1871U, 0xaa7eebfb9df9de8dU, 0xd51ea6fa85785631U, + 0x8533285c936b35deU, 0xa67ff273b8460356U, 0xd01fef10a657842cU, + 0x8213f56a67f6b29bU, 0xa298f2c501f45f42U, 0xcb3f2f7642717713U, + 0xfe0efb53d30dd4d7U, 0x9ec95d1463e8a506U, 0xc67bb4597ce2ce48U, + 0xf81aa16fdc1b81daU, 0x9b10a4e5e9913128U, 0xc1d4ce1f63f57d72U, + 0xf24a01a73cf2dccfU, 0x976e41088617ca01U, 0xbd49d14aa79dbc82U, + 0xec9c459d51852ba2U, 0x93e1ab8252f33b45U, 0xb8da1662e7b00a17U, + 0xe7109bfba19c0c9dU, 0x906a617d450187e2U, 0xb484f9dc9641e9daU, + 0xe1a63853bbd26451U, 0x8d07e33455637eb2U, 0xb049dc016abc5e5fU, + 0xdc5c5301c56b75f7U, 0x89b9b3e11b6329baU, 0xac2820d9623bf429U, + 0xd732290fbacaf133U, 0x867f59a9d4bed6c0U, 0xa81f301449ee8c70U, + 0xd226fc195c6a2f8cU, 0x83585d8fd9c25db7U, 0xa42e74f3d032f525U, + 0xcd3a1230c43fb26fU, 0x80444b5e7aa7cf85U, 0xa0555e361951c366U, + 0xc86ab5c39fa63440U, 0xfa856334878fc150U, 0x9c935e00d4b9d8d2U, + 0xc3b8358109e84f07U, 0xf4a642e14c6262c8U, 0x98e7e9cccfbd7dbdU, + 0xbf21e44003acdd2cU, 0xeeea5d5004981478U, 0x95527a5202df0ccbU, + 0xbaa718e68396cffdU, 0xe950df20247c83fdU, 0x91d28b7416cdd27eU, + 0xb6472e511c81471dU, 0xe3d8f9e563a198e5U, 0x8e679c2f5e44ff8fU, +}; + +const int16_t kPower10ExponentTable[] = { + -1200, -1196, -1193, -1190, -1186, -1183, -1180, -1176, -1173, -1170, -1166, + -1163, -1160, -1156, -1153, -1150, -1146, -1143, -1140, -1136, -1133, -1130, + -1127, -1123, -1120, -1117, -1113, -1110, -1107, -1103, -1100, -1097, -1093, + -1090, -1087, -1083, -1080, -1077, -1073, -1070, -1067, -1063, -1060, -1057, + -1053, -1050, -1047, -1043, -1040, -1037, -1034, -1030, -1027, -1024, -1020, + -1017, -1014, -1010, -1007, -1004, -1000, -997, -994, -990, -987, -984, + -980, -977, -974, -970, -967, -964, -960, -957, -954, -950, -947, + -944, -940, -937, -934, -931, -927, -924, -921, -917, -914, -911, + -907, -904, -901, -897, -894, -891, -887, -884, -881, -877, -874, + -871, -867, -864, -861, -857, -854, -851, -847, -844, -841, -838, + -834, -831, -828, -824, -821, -818, -814, -811, -808, -804, -801, + -798, -794, -791, -788, -784, -781, -778, -774, -771, -768, -764, + -761, -758, -754, -751, -748, -744, -741, -738, -735, -731, -728, + -725, -721, -718, -715, -711, -708, -705, -701, -698, -695, -691, + -688, -685, -681, -678, -675, -671, -668, -665, -661, -658, -655, + -651, -648, -645, -642, -638, -635, -632, -628, -625, -622, -618, + -615, -612, -608, -605, -602, -598, -595, -592, -588, -585, -582, + -578, -575, -572, -568, -565, -562, -558, -555, -552, -549, -545, + -542, -539, -535, -532, -529, -525, -522, -519, -515, -512, -509, + -505, -502, -499, -495, -492, -489, -485, -482, -479, -475, -472, + -469, -465, -462, -459, -455, -452, -449, -446, -442, -439, -436, + -432, -429, -426, -422, -419, -416, -412, -409, -406, -402, -399, + -396, -392, -389, -386, -382, -379, -376, -372, -369, -366, -362, + -359, -356, -353, -349, -346, -343, -339, -336, -333, -329, -326, + -323, -319, -316, -313, -309, -306, -303, -299, -296, -293, -289, + -286, -283, -279, -276, -273, -269, -266, -263, -259, -256, -253, + -250, -246, -243, -240, -236, -233, -230, -226, -223, -220, -216, + -213, -210, -206, -203, -200, -196, -193, -190, -186, -183, -180, + -176, -173, -170, -166, -163, -160, -157, -153, -150, -147, -143, + -140, -137, -133, -130, -127, -123, -120, -117, -113, -110, -107, + -103, -100, -97, -93, -90, -87, -83, -80, -77, -73, -70, + -67, -63, -60, -57, -54, -50, -47, -44, -40, -37, -34, + -30, -27, -24, -20, -17, -14, -10, -7, -4, 0, 3, + 6, 10, 13, 16, 20, 23, 26, 30, 33, 36, 39, + 43, 46, 49, 53, 56, 59, 63, 66, 69, 73, 76, + 79, 83, 86, 89, 93, 96, 99, 103, 106, 109, 113, + 116, 119, 123, 126, 129, 132, 136, 139, 142, 146, 149, + 152, 156, 159, 162, 166, 169, 172, 176, 179, 182, 186, + 189, 192, 196, 199, 202, 206, 209, 212, 216, 219, 222, + 226, 229, 232, 235, 239, 242, 245, 249, 252, 255, 259, + 262, 265, 269, 272, 275, 279, 282, 285, 289, 292, 295, + 299, 302, 305, 309, 312, 315, 319, 322, 325, 328, 332, + 335, 338, 342, 345, 348, 352, 355, 358, 362, 365, 368, + 372, 375, 378, 382, 385, 388, 392, 395, 398, 402, 405, + 408, 412, 415, 418, 422, 425, 428, 431, 435, 438, 441, + 445, 448, 451, 455, 458, 461, 465, 468, 471, 475, 478, + 481, 485, 488, 491, 495, 498, 501, 505, 508, 511, 515, + 518, 521, 524, 528, 531, 534, 538, 541, 544, 548, 551, + 554, 558, 561, 564, 568, 571, 574, 578, 581, 584, 588, + 591, 594, 598, 601, 604, 608, 611, 614, 617, 621, 624, + 627, 631, 634, 637, 641, 644, 647, 651, 654, 657, 661, + 664, 667, 671, 674, 677, 681, 684, 687, 691, 694, 697, + 701, 704, 707, 711, 714, 717, 720, 724, 727, 730, 734, + 737, 740, 744, 747, 750, 754, 757, 760, 764, 767, 770, + 774, 777, 780, 784, 787, 790, 794, 797, 800, 804, 807, + 810, 813, 817, 820, 823, 827, 830, 833, 837, 840, 843, + 847, 850, 853, 857, 860, 863, 867, 870, 873, 877, 880, + 883, 887, 890, 893, 897, 900, 903, 907, 910, 913, 916, + 920, 923, 926, 930, 933, 936, 940, 943, 946, 950, 953, + 956, 960, +}; + +} // namespace +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.h new file mode 100644 index 0000000000..1a115aa251 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/charconv.h @@ -0,0 +1,120 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CHARCONV_H_ +#define ABSL_STRINGS_CHARCONV_H_ + +#include <system_error> // NOLINT(build/c++11) + +#include "y_absl/base/config.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// Workalike compatibilty version of std::chars_format from C++17. +// +// This is an bitfield enumerator which can be passed to y_absl::from_chars to +// configure the string-to-float conversion. +enum class chars_format { + scientific = 1, + fixed = 2, + hex = 4, + general = fixed | scientific, +}; + +// The return result of a string-to-number conversion. +// +// `ec` will be set to `invalid_argument` if a well-formed number was not found +// at the start of the input range, `result_out_of_range` if a well-formed +// number was found, but it was out of the representable range of the requested +// type, or to std::errc() otherwise. +// +// If a well-formed number was found, `ptr` is set to one past the sequence of +// characters that were successfully parsed. If none was found, `ptr` is set +// to the `first` argument to from_chars. +struct from_chars_result { + const char* ptr; + std::errc ec; +}; + +// Workalike compatibilty version of std::from_chars from C++17. Currently +// this only supports the `double` and `float` types. +// +// This interface incorporates the proposed resolutions for library issues +// DR 3080 and DR 3081. If these are adopted with different wording, +// Abseil's behavior will change to match the standard. (The behavior most +// likely to change is for DR 3081, which says what `value` will be set to in +// the case of overflow and underflow. Code that wants to avoid possible +// breaking changes in this area should not depend on `value` when the returned +// from_chars_result indicates a range error.) +// +// Searches the range [first, last) for the longest matching pattern beginning +// at `first` that represents a floating point number. If one is found, store +// the result in `value`. +// +// The matching pattern format is almost the same as that of strtod(), except +// that (1) C locale is not respected, (2) an initial '+' character in the +// input range will never be matched, and (3) leading whitespaces are not +// ignored. +// +// If `fmt` is set, it must be one of the enumerator values of the chars_format. +// (This is despite the fact that chars_format is a bitmask type.) If set to +// `scientific`, a matching number must contain an exponent. If set to `fixed`, +// then an exponent will never match. (For example, the string "1e5" will be +// parsed as "1".) If set to `hex`, then a hexadecimal float is parsed in the +// format that strtod() accepts, except that a "0x" prefix is NOT matched. +// (In particular, in `hex` mode, the input "0xff" results in the largest +// matching pattern "0".) +y_absl::from_chars_result from_chars(const char* first, const char* last, + double& value, // NOLINT + chars_format fmt = chars_format::general); + +y_absl::from_chars_result from_chars(const char* first, const char* last, + float& value, // NOLINT + chars_format fmt = chars_format::general); + +// std::chars_format is specified as a bitmask type, which means the following +// operations must be provided: +inline constexpr chars_format operator&(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) & + static_cast<int>(rhs)); +} +inline constexpr chars_format operator|(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) | + static_cast<int>(rhs)); +} +inline constexpr chars_format operator^(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) ^ + static_cast<int>(rhs)); +} +inline constexpr chars_format operator~(chars_format arg) { + return static_cast<chars_format>(~static_cast<int>(arg)); +} +inline chars_format& operator&=(chars_format& lhs, chars_format rhs) { + lhs = lhs & rhs; + return lhs; +} +inline chars_format& operator|=(chars_format& lhs, chars_format rhs) { + lhs = lhs | rhs; + return lhs; +} +inline chars_format& operator^=(chars_format& lhs, chars_format rhs) { + lhs = lhs ^ rhs; + return lhs; +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CHARCONV_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.cc new file mode 100644 index 0000000000..0de4ea1b3c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.cc @@ -0,0 +1,2047 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/cord.h" + +#include <algorithm> +#include <atomic> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <iomanip> +#include <iostream> +#include <limits> +#include <ostream> +#include <sstream> +#include <type_traits> +#include <unordered_set> +#include <vector> + +#include "y_absl/base/casts.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" +#include "y_absl/container/fixed_array.h" +#include "y_absl/container/inlined_vector.h" +#include "y_absl/strings/escaping.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_flat.h" +#include "y_absl/strings/internal/cordz_statistics.h" +#include "y_absl/strings/internal/cordz_update_scope.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/str_cat.h" +#include "y_absl/strings/str_format.h" +#include "y_absl/strings/str_join.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +using ::y_absl::cord_internal::CordRep; +using ::y_absl::cord_internal::CordRepBtree; +using ::y_absl::cord_internal::CordRepConcat; +using ::y_absl::cord_internal::CordRepExternal; +using ::y_absl::cord_internal::CordRepFlat; +using ::y_absl::cord_internal::CordRepSubstring; +using ::y_absl::cord_internal::CordzUpdateTracker; +using ::y_absl::cord_internal::InlineData; +using ::y_absl::cord_internal::kMaxFlatLength; +using ::y_absl::cord_internal::kMinFlatLength; + +using ::y_absl::cord_internal::kInlinedVectorSize; +using ::y_absl::cord_internal::kMaxBytesToCopy; + +constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { + return n == 0 ? a : Fibonacci(n - 1, b, a + b); +} + +static_assert(Fibonacci(63) == 6557470319842, + "Fibonacci values computed incorrectly"); + +// Minimum length required for a given depth tree -- a tree is considered +// balanced if +// length(t) >= min_length[depth(t)] +// The root node depth is allowed to become twice as large to reduce rebalancing +// for larger strings (see IsRootBalanced). +static constexpr uint64_t min_length[] = { + Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), + Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), + Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), + Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17), + Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21), + Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25), + Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29), + Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33), + Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37), + Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41), + Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45), + Fibonacci(46), Fibonacci(47), + 0xffffffffffffffffull, // Avoid overflow +}; + +static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); + +static inline bool btree_enabled() { + return cord_internal::cord_btree_enabled.load( + std::memory_order_relaxed); +} + +static inline bool IsRootBalanced(CordRep* node) { + if (!node->IsConcat()) { + return true; + } else if (node->concat()->depth() <= 15) { + return true; + } else if (node->concat()->depth() > kMinLengthSize) { + return false; + } else { + // Allow depth to become twice as large as implied by fibonacci rule to + // reduce rebalancing for larger strings. + return (node->length >= min_length[node->concat()->depth() / 2]); + } +} + +static CordRep* Rebalance(CordRep* node); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent = 0); +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation); + +static inline CordRep* VerifyTree(CordRep* node) { + // Verification is expensive, so only do it in debug mode. + // Even in debug mode we normally do only light validation. + // If you are debugging Cord itself, you should define the + // macro EXTRA_CORD_VALIDATION, e.g. by adding + // --copt=-DEXTRA_CORD_VALIDATION to the blaze line. +#ifdef EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true)); +#else // EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false)); +#endif // EXTRA_CORD_VALIDATION + static_cast<void>(&VerifyNode); + + return node; +} + +// Return the depth of a node +static int Depth(const CordRep* rep) { + if (rep->IsConcat()) { + return rep->concat()->depth(); + } else { + return 0; + } +} + +static void SetConcatChildren(CordRepConcat* concat, CordRep* left, + CordRep* right) { + concat->left = left; + concat->right = right; + + concat->length = left->length + right->length; + concat->set_depth(1 + std::max(Depth(left), Depth(right))); +} + +// Create a concatenation of the specified nodes. +// Does not change the refcounts of "left" and "right". +// The returned node has a refcount of 1. +static CordRep* RawConcat(CordRep* left, CordRep* right) { + // Avoid making degenerate concat nodes (one child is empty) + if (left == nullptr) return right; + if (right == nullptr) return left; + if (left->length == 0) { + CordRep::Unref(left); + return right; + } + if (right->length == 0) { + CordRep::Unref(right); + return left; + } + + CordRepConcat* rep = new CordRepConcat(); + rep->tag = cord_internal::CONCAT; + SetConcatChildren(rep, left, right); + + return rep; +} + +static CordRep* Concat(CordRep* left, CordRep* right) { + CordRep* rep = RawConcat(left, right); + if (rep != nullptr && !IsRootBalanced(rep)) { + rep = Rebalance(rep); + } + return VerifyTree(rep); +} + +// Make a balanced tree out of an array of leaf nodes. +static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { + // Make repeated passes over the array, merging adjacent pairs + // until we are left with just a single node. + while (n > 1) { + size_t dst = 0; + for (size_t src = 0; src < n; src += 2) { + if (src + 1 < n) { + reps[dst] = Concat(reps[src], reps[src + 1]); + } else { + reps[dst] = reps[src]; + } + dst++; + } + n = dst; + } + + return reps[0]; +} + +static CordRepFlat* CreateFlat(const char* data, size_t length, + size_t alloc_hint) { + CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); + flat->length = length; + memcpy(flat->Data(), data, length); + return flat; +} + +// Creates a new flat or Btree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) { + if (length <= kMaxFlatLength) { + return CreateFlat(data, length, alloc_hint); + } + CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); + data += kMaxFlatLength; + length -= kMaxFlatLength; + auto* root = CordRepBtree::Create(flat); + return CordRepBtree::Append(root, {data, length}, alloc_hint); +} + +// Create a new tree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) { + if (length == 0) return nullptr; + if (btree_enabled()) { + return NewBtree(data, length, alloc_hint); + } + y_absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); + size_t n = 0; + do { + const size_t len = std::min(length, kMaxFlatLength); + CordRepFlat* rep = CordRepFlat::New(len + alloc_hint); + rep->length = len; + memcpy(rep->Data(), data, len); + reps[n++] = VerifyTree(rep); + data += len; + length -= len; + } while (length != 0); + return MakeBalancedTree(reps.data(), n); +} + +namespace cord_internal { + +void InitializeCordRepExternal(y_absl::string_view data, CordRepExternal* rep) { + assert(!data.empty()); + rep->length = data.size(); + rep->tag = EXTERNAL; + rep->base = data.data(); + VerifyTree(rep); +} + +} // namespace cord_internal + +static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { + // Never create empty substring nodes + if (length == 0) { + CordRep::Unref(child); + return nullptr; + } else { + CordRepSubstring* rep = new CordRepSubstring(); + assert((offset + length) <= child->length); + rep->length = length; + rep->tag = cord_internal::SUBSTRING; + rep->start = offset; + rep->child = child; + return VerifyTree(rep); + } +} + +// Creates a CordRep from the provided string. If the string is large enough, +// and not wasteful, we move the string into an external cord rep, preserving +// the already allocated string contents. +// Requires the provided string length to be larger than `kMaxInline`. +static CordRep* CordRepFromString(TString&& src) { + assert(src.length() > cord_internal::kMaxInline); + if ( + // String is short: copy data to avoid external block overhead. + src.size() <= kMaxBytesToCopy || + // String is wasteful: copy data to avoid pinning too much unused memory. + src.size() < src.capacity() / 2 + ) { + return NewTree(src.data(), src.size(), 0); + } + + struct StringReleaser { + void operator()(y_absl::string_view /* data */) {} + TString data; + }; + const y_absl::string_view original_data = src; + auto* rep = + static_cast<::y_absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( + y_absl::cord_internal::NewExternalRep(original_data, + StringReleaser{std::move(src)})); + // Moving src may have invalidated its data pointer, so adjust it. + rep->base = rep->template get<0>().data.data(); + return rep; +} + +// -------------------------------------------------------------------- +// Cord::InlineRep functions + +constexpr unsigned char Cord::InlineRep::kMaxInline; + +inline void Cord::InlineRep::set_data(const char* data, size_t n, + bool nullify_tail) { + static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); + + cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail); + set_inline_size(n); +} + +inline char* Cord::InlineRep::set_data(size_t n) { + assert(n <= kMaxInline); + ResetToEmpty(); + set_inline_size(n); + return data_.as_chars(); +} + +inline void Cord::InlineRep::reduce_size(size_t n) { + size_t tag = inline_size(); + assert(tag <= kMaxInline); + assert(tag >= n); + tag -= n; + memset(data_.as_chars() + tag, 0, n); + set_inline_size(static_cast<char>(tag)); +} + +inline void Cord::InlineRep::remove_prefix(size_t n) { + cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n, + inline_size() - n); + reduce_size(n); +} + +// Returns `rep` converted into a CordRepBtree. +// Directly returns `rep` if `rep` is already a CordRepBtree. +static CordRepBtree* ForceBtree(CordRep* rep) { + return rep->IsBtree() ? rep->btree() : CordRepBtree::Create(rep); +} + +void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + if (btree_enabled()) { + tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); + } else { + tree = Concat(flat, tree); + } + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + if (btree_enabled()) { + tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); + } else { + tree = Concat(data_.as_tree(), tree); + } + SetTree(tree, scope); +} + +void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) { + if (tree == nullptr) return; + if (data_.is_tree()) { + AppendTreeToTree(tree, method); + } else { + AppendTreeToInlined(tree, method); + } +} + +void Cord::InlineRep::PrependTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); + } else { + tree = Concat(tree, flat); + } + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::PrependTreeToTree(CordRep* tree, + MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); + } else { + tree = Concat(tree, data_.as_tree()); + } + SetTree(tree, scope); +} + +void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { + assert(tree != nullptr); + if (data_.is_tree()) { + PrependTreeToTree(tree, method); + } else { + PrependTreeToInlined(tree, method); + } +} + +// Searches for a non-full flat node at the rightmost leaf of the tree. If a +// suitable leaf is found, the function will update the length field for all +// nodes to account for the size increase. The append region address will be +// written to region and the actual size increase will be written to size. +static inline bool PrepareAppendRegion(CordRep* root, char** region, + size_t* size, size_t max_length) { + if (root->IsBtree() && root->refcount.IsMutable()) { + Span<char> span = root->btree()->GetAppendBuffer(max_length); + if (!span.empty()) { + *region = span.data(); + *size = span.size(); + return true; + } + } + + // Search down the right-hand path for a non-full FLAT node. + CordRep* dst = root; + while (dst->IsConcat() && dst->refcount.IsMutable()) { + dst = dst->concat()->right; + } + + if (!dst->IsFlat() || !dst->refcount.IsMutable()) { + *region = nullptr; + *size = 0; + return false; + } + + const size_t in_use = dst->length; + const size_t capacity = dst->flat()->Capacity(); + if (in_use == capacity) { + *region = nullptr; + *size = 0; + return false; + } + + size_t size_increase = std::min(capacity - in_use, max_length); + + // We need to update the length fields for all nodes, including the leaf node. + for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { + rep->length += size_increase; + } + dst->length += size_increase; + + *region = dst->flat()->Data() + in_use; + *size = size_increase; + return true; +} + +template <bool has_length> +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, + size_t length) { + auto constexpr method = CordzUpdateTracker::kGetAppendRegion; + + CordRep* root = tree(); + size_t sz = root ? root->length : inline_size(); + if (root == nullptr) { + size_t available = kMaxInline - sz; + if (available >= (has_length ? length : 1)) { + *region = data_.as_chars() + sz; + *size = has_length ? length : available; + set_inline_size(has_length ? sz + length : kMaxInline); + return; + } + } + + size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength); + CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra); + CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method); + if (PrepareAppendRegion(rep, region, size, length)) { + CommitTree(root, rep, scope, method); + return; + } + + // Allocate new node. + CordRepFlat* new_node = CordRepFlat::New(extra); + new_node->length = std::min(new_node->Capacity(), length); + *region = new_node->Data(); + *size = new_node->length; + + if (btree_enabled()) { + rep = CordRepBtree::Append(ForceBtree(rep), new_node); + } else { + rep = Concat(rep, new_node); + } + CommitTree(root, rep, scope, method); +} + +// Computes the memory side of the provided edge which must be a valid data edge +// for a btrtee, i.e., a FLAT, EXTERNAL or SUBSTRING of a FLAT or EXTERNAL node. +static bool RepMemoryUsageDataEdge(const CordRep* rep, + size_t* total_mem_usage) { + size_t maybe_sub_size = 0; + if (ABSL_PREDICT_FALSE(rep->IsSubstring())) { + maybe_sub_size = sizeof(cord_internal::CordRepSubstring); + rep = rep->substring()->child; + } + if (rep->IsFlat()) { + *total_mem_usage += maybe_sub_size + rep->flat()->AllocatedSize(); + return true; + } + if (rep->IsExternal()) { + // We don't know anything about the embedded / bound data, but we can safely + // assume it is 'at least' a word / pointer to data. In the future we may + // choose to use the 'data' byte as a tag to identify the types of some + // well-known externals, such as a TString instance. + *total_mem_usage += maybe_sub_size + + sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + + rep->length; + return true; + } + return false; +} + +// If the rep is a leaf, this will increment the value at total_mem_usage and +// will return true. +static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + if (rep->IsFlat()) { + *total_mem_usage += rep->flat()->AllocatedSize(); + return true; + } + if (rep->IsExternal()) { + // We don't know anything about the embedded / bound data, but we can safely + // assume it is 'at least' a word / pointer to data. In the future we may + // choose to use the 'data' byte as a tag to identify the types of some + // well-known externals, such as a TString instance. + *total_mem_usage += + sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length; + return true; + } + return false; +} + +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + assert(&src != this); + assert(is_tree() || src.is_tree()); + auto constexpr method = CordzUpdateTracker::kAssignCord; + if (ABSL_PREDICT_TRUE(!is_tree())) { + EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method); + return; + } + + CordRep* tree = as_tree(); + if (CordRep* src_tree = src.tree()) { + // Leave any existing `cordz_info` in place, and let MaybeTrackCord() + // decide if this cord should be (or remains to be) sampled or not. + data_.set_tree(CordRep::Ref(src_tree)); + CordzInfo::MaybeTrackCord(data_, src.data_, method); + } else { + CordzInfo::MaybeUntrackCord(data_.cordz_info()); + data_ = src.data_; + } + CordRep::Unref(tree); +} + +void Cord::InlineRep::UnrefTree() { + if (is_tree()) { + CordzInfo::MaybeUntrackCord(data_.cordz_info()); + CordRep::Unref(tree()); + } +} + +// -------------------------------------------------------------------- +// Constructors and destructors + +Cord::Cord(y_absl::string_view src, MethodIdentifier method) + : contents_(InlineData::kDefaultInit) { + const size_t n = src.size(); + if (n <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), n, true); + } else { + CordRep* rep = NewTree(src.data(), n, 0); + contents_.EmplaceTree(rep, method); + } +} + +template <typename T, Cord::EnableIfString<T>> +Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) { + if (src.size() <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), src.size(), true); + } else { + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString); + } +} + +template Cord::Cord(TString&& src); + +// The destruction code is separate so that the compiler can determine +// that it does not need to call the destructor on a moved-from Cord. +void Cord::DestroyCordSlow() { + assert(contents_.is_tree()); + CordzInfo::MaybeUntrackCord(contents_.cordz_info()); + CordRep::Unref(VerifyTree(contents_.as_tree())); +} + +// -------------------------------------------------------------------- +// Mutators + +void Cord::Clear() { + if (CordRep* tree = contents_.clear()) { + CordRep::Unref(tree); + } +} + +Cord& Cord::AssignLargeString(TString&& src) { + auto constexpr method = CordzUpdateTracker::kAssignString; + assert(src.size() > kMaxBytesToCopy); + CordRep* rep = CordRepFromString(std::move(src)); + if (CordRep* tree = contents_.tree()) { + CordzUpdateScope scope(contents_.cordz_info(), method); + contents_.SetTree(rep, scope); + CordRep::Unref(tree); + } else { + contents_.EmplaceTree(rep, method); + } + return *this; +} + +Cord& Cord::operator=(y_absl::string_view src) { + auto constexpr method = CordzUpdateTracker::kAssignString; + const char* data = src.data(); + size_t length = src.size(); + CordRep* tree = contents_.tree(); + if (length <= InlineRep::kMaxInline) { + // Embed into this->contents_, which is somewhat subtle: + // - MaybeUntrackCord must be called before Unref(tree). + // - MaybeUntrackCord must be called before set_data() clobbers cordz_info. + // - set_data() must be called before Unref(tree) as it may reference tree. + if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info()); + contents_.set_data(data, length, true); + if (tree != nullptr) CordRep::Unref(tree); + return *this; + } + if (tree != nullptr) { + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsFlat() && tree->flat()->Capacity() >= length && + tree->refcount.IsMutable()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->flat()->Data(), data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.SetTree(NewTree(data, length, 0), scope); + CordRep::Unref(tree); + } else { + contents_.EmplaceTree(NewTree(data, length, 0), method); + } + return *this; +} + +// TODO(sanjay): Move to Cord::InlineRep section of file. For now, +// we keep it here to make diffs easier. +void Cord::InlineRep::AppendArray(y_absl::string_view src, + MethodIdentifier method) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + + size_t appended = 0; + CordRep* rep = tree(); + const CordRep* const root = rep; + CordzUpdateScope scope(root ? cordz_info() : nullptr, method); + if (root != nullptr) { + char* region; + if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { + memcpy(region, src.data(), appended); + } + } else { + // Try to fit in the inline buffer if possible. + size_t inline_length = inline_size(); + if (src.size() <= kMaxInline - inline_length) { + // Append new data to embedded array + memcpy(data_.as_chars() + inline_length, src.data(), src.size()); + set_inline_size(inline_length + src.size()); + return; + } + + // Allocate flat to be a perfect fit on first append exceeding inlined size. + // Subsequent growth will use amortized growth until we reach maximum flat + // size. + rep = CordRepFlat::New(inline_length + src.size()); + appended = std::min(src.size(), rep->flat()->Capacity() - inline_length); + memcpy(rep->flat()->Data(), data_.as_chars(), inline_length); + memcpy(rep->flat()->Data() + inline_length, src.data(), appended); + rep->length = inline_length + appended; + } + + src.remove_prefix(appended); + if (src.empty()) { + CommitTree(root, rep, scope, method); + return; + } + + if (btree_enabled()) { + // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. + rep = ForceBtree(rep); + const size_t min_growth = std::max<size_t>(rep->length / 10, src.size()); + rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size()); + } else { + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree + // is going to be a FLAT node, which will permit further inplace appends. + size_t length = src.size(); + if (src.size() < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src.size() + // This will cause a reasonable conservative step-up in size that is + // still large enough to avoid excessive amounts of small fragments + // being added. + length = std::max<size_t>(rep->length / 10, src.size()); + } + rep = Concat(rep, NewTree(src.data(), src.size(), length - src.size())); + } + CommitTree(root, rep, scope, method); +} + +inline CordRep* Cord::TakeRep() const& { + return CordRep::Ref(contents_.tree()); +} + +inline CordRep* Cord::TakeRep() && { + CordRep* rep = contents_.tree(); + contents_.clear(); + return rep; +} + +template <typename C> +inline void Cord::AppendImpl(C&& src) { + auto constexpr method = CordzUpdateTracker::kAppendCord; + if (empty()) { + // Since destination is empty, we can avoid allocating a node, + if (src.contents_.is_tree()) { + // by taking the tree directly + CordRep* rep = std::forward<C>(src).TakeRep(); + contents_.EmplaceTree(rep, method); + } else { + // or copying over inline data + contents_.data_ = src.contents_.data_; + } + return; + } + + // For short cords, it is faster to copy data if there is room in dst. + const size_t src_size = src.contents_.size(); + if (src_size <= kMaxBytesToCopy) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree == nullptr) { + // src has embedded data. + contents_.AppendArray({src.contents_.data(), src_size}, method); + return; + } + if (src_tree->IsFlat()) { + // src tree just has one flat node. + contents_.AppendArray({src_tree->flat()->Data(), src_size}, method); + return; + } + if (&src == this) { + // ChunkIterator below assumes that src is not modified during traversal. + Append(Cord(src)); + return; + } + // TODO(mec): Should we only do this if "dst" has space? + for (y_absl::string_view chunk : src.Chunks()) { + Append(chunk); + } + return; + } + + // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) + CordRep* rep = std::forward<C>(src).TakeRep(); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); +} + +void Cord::Append(const Cord& src) { + AppendImpl(src); +} + +void Cord::Append(Cord&& src) { + AppendImpl(std::move(src)); +} + +template <typename T, Cord::EnableIfString<T>> +void Cord::Append(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Append(y_absl::string_view(src)); + } else { + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendString); + } +} + +template void Cord::Append(TString&& src); + +void Cord::Prepend(const Cord& src) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree != nullptr) { + CordRep::Ref(src_tree); + contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord); + return; + } + + // `src` cord is inlined. + y_absl::string_view src_contents(src.contents_.data(), src.contents_.size()); + return Prepend(src_contents); +} + +void Cord::PrependArray(y_absl::string_view src, MethodIdentifier method) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + if (!contents_.is_tree()) { + size_t cur_size = contents_.inline_size(); + if (cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1); + contents_.set_inline_size(cur_size + src.size()); + return; + } + } + CordRep* rep = NewTree(src.data(), src.size(), 0); + contents_.PrependTree(rep, method); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Prepend(y_absl::string_view(src)); + } else { + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); + } +} + +template void Cord::Prepend(TString&& src); + +static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return CordRep::Ref(node); + y_absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + + while (node->IsConcat()) { + assert(n <= node->length); + if (n < node->concat()->left->length) { + // Push right to stack, descend left. + rhs_stack.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Drop left, descend right. + n -= node->concat()->left->length; + node = node->concat()->right; + } + } + assert(n <= node->length); + + if (n == 0) { + CordRep::Ref(node); + } else { + size_t start = n; + size_t len = node->length - n; + if (node->IsSubstring()) { + // Consider in-place update of node, similar to in RemoveSuffixFrom(). + start += node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(CordRep::Ref(node), start, len); + } + while (!rhs_stack.empty()) { + node = Concat(node, CordRep::Ref(rhs_stack.back())); + rhs_stack.pop_back(); + } + return node; +} + +// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the +// exception that removing a suffix has an optimization where a node may be +// edited in place iff that node and all its ancestors have a refcount of 1. +static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return CordRep::Ref(node); + y_absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; + bool inplace_ok = node->refcount.IsMutable(); + + while (node->IsConcat()) { + assert(n <= node->length); + if (n < node->concat()->right->length) { + // Push left to stack, descend right. + lhs_stack.push_back(node->concat()->left); + node = node->concat()->right; + } else { + // Drop right, descend left. + n -= node->concat()->right->length; + node = node->concat()->left; + } + inplace_ok = inplace_ok && node->refcount.IsMutable(); + } + assert(n <= node->length); + + if (n == 0) { + CordRep::Ref(node); + } else if (inplace_ok && !node->IsExternal()) { + // Consider making a new buffer if the current node capacity is much + // larger than the new length. + CordRep::Ref(node); + node->length -= n; + } else { + size_t start = 0; + size_t len = node->length - n; + if (node->IsSubstring()) { + start = node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(CordRep::Ref(node), start, len); + } + while (!lhs_stack.empty()) { + node = Concat(CordRep::Ref(lhs_stack.back()), node); + lhs_stack.pop_back(); + } + return node; +} + +void Cord::RemovePrefix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + y_absl::StrCat("Requested prefix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.remove_prefix(n); + } else { + auto constexpr method = CordzUpdateTracker::kRemovePrefix; + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsBtree()) { + CordRep* old = tree; + tree = tree->btree()->SubTree(n, tree->length - n); + CordRep::Unref(old); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + CordRep::Unref(tree); + tree = VerifyTree(newrep); + } + contents_.SetTreeOrEmpty(tree, scope); + } +} + +void Cord::RemoveSuffix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + y_absl::StrCat("Requested suffix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.reduce_size(n); + } else { + auto constexpr method = CordzUpdateTracker::kRemoveSuffix; + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsBtree()) { + tree = CordRepBtree::RemoveSuffix(tree->btree(), n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + CordRep::Unref(tree); + tree = VerifyTree(newrep); + } + contents_.SetTreeOrEmpty(tree, scope); + } +} + +// Work item for NewSubRange(). +struct SubRange { + SubRange(CordRep* a_node, size_t a_pos, size_t a_n) + : node(a_node), pos(a_pos), n(a_n) {} + CordRep* node; // nullptr means concat last 2 results. + size_t pos; + size_t n; +}; + +static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { + y_absl::InlinedVector<CordRep*, kInlinedVectorSize> results; + y_absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + todo.push_back(SubRange(node, pos, n)); + do { + const SubRange& sr = todo.back(); + node = sr.node; + pos = sr.pos; + n = sr.n; + todo.pop_back(); + + if (node == nullptr) { + assert(results.size() >= 2); + CordRep* right = results.back(); + results.pop_back(); + CordRep* left = results.back(); + results.pop_back(); + results.push_back(Concat(left, right)); + } else if (pos == 0 && n == node->length) { + results.push_back(CordRep::Ref(node)); + } else if (!node->IsConcat()) { + if (node->IsSubstring()) { + pos += node->substring()->start; + node = node->substring()->child; + } + results.push_back(NewSubstring(CordRep::Ref(node), pos, n)); + } else if (pos + n <= node->concat()->left->length) { + todo.push_back(SubRange(node->concat()->left, pos, n)); + } else if (pos >= node->concat()->left->length) { + pos -= node->concat()->left->length; + todo.push_back(SubRange(node->concat()->right, pos, n)); + } else { + size_t left_n = node->concat()->left->length - pos; + todo.push_back(SubRange(nullptr, 0, 0)); // Concat() + todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); + todo.push_back(SubRange(node->concat()->left, pos, left_n)); + } + } while (!todo.empty()); + assert(results.size() == 1); + return results[0]; +} + +Cord Cord::Subcord(size_t pos, size_t new_size) const { + Cord sub_cord; + size_t length = size(); + if (pos > length) pos = length; + if (new_size > length - pos) new_size = length - pos; + if (new_size == 0) return sub_cord; + + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + // sub_cord is newly constructed, no need to re-zero-out the tail of + // contents_ memory. + sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); + return sub_cord; + } + + if (new_size <= InlineRep::kMaxInline) { + char* dest = sub_cord.contents_.data_.as_chars(); + Cord::ChunkIterator it = chunk_begin(); + it.AdvanceBytes(pos); + size_t remaining_size = new_size; + while (remaining_size > it->size()) { + cord_internal::SmallMemmove(dest, it->data(), it->size()); + remaining_size -= it->size(); + dest += it->size(); + ++it; + } + cord_internal::SmallMemmove(dest, it->data(), remaining_size); + sub_cord.contents_.set_inline_size(new_size); + return sub_cord; + } + + if (tree->IsBtree()) { + tree = tree->btree()->SubTree(pos, new_size); + } else { + tree = NewSubRange(tree, pos, new_size); + } + sub_cord.contents_.EmplaceTree(tree, contents_.data_, + CordzUpdateTracker::kSubCord); + return sub_cord; +} + +// -------------------------------------------------------------------- +// Balancing + +class CordForest { + public: + explicit CordForest(size_t length) + : root_length_(length), trees_(kMinLengthSize, nullptr) {} + + void Build(CordRep* cord_root) { + std::vector<CordRep*> pending = {cord_root}; + + while (!pending.empty()) { + CordRep* node = pending.back(); + pending.pop_back(); + CheckNode(node); + if (ABSL_PREDICT_FALSE(!node->IsConcat())) { + AddNode(node); + continue; + } + + CordRepConcat* concat_node = node->concat(); + if (concat_node->depth() >= kMinLengthSize || + concat_node->length < min_length[concat_node->depth()]) { + pending.push_back(concat_node->right); + pending.push_back(concat_node->left); + + if (concat_node->refcount.IsOne()) { + concat_node->left = concat_freelist_; + concat_freelist_ = concat_node; + } else { + CordRep::Ref(concat_node->right); + CordRep::Ref(concat_node->left); + CordRep::Unref(concat_node); + } + } else { + AddNode(node); + } + } + } + + CordRep* ConcatNodes() { + CordRep* sum = nullptr; + for (auto* node : trees_) { + if (node == nullptr) continue; + + sum = PrependNode(node, sum); + root_length_ -= node->length; + if (root_length_ == 0) break; + } + ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); + return VerifyTree(sum); + } + + private: + CordRep* AppendNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(sum, node); + } + + CordRep* PrependNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(node, sum); + } + + void AddNode(CordRep* node) { + CordRep* sum = nullptr; + + // Collect together everything with which we will merge with node + int i = 0; + for (; node->length > min_length[i + 1]; ++i) { + auto& tree_at_i = trees_[i]; + + if (tree_at_i == nullptr) continue; + sum = PrependNode(tree_at_i, sum); + tree_at_i = nullptr; + } + + sum = AppendNode(node, sum); + + // Insert sum into appropriate place in the forest + for (; sum->length >= min_length[i]; ++i) { + auto& tree_at_i = trees_[i]; + if (tree_at_i == nullptr) continue; + + sum = MakeConcat(tree_at_i, sum); + tree_at_i = nullptr; + } + + // min_length[0] == 1, which means sum->length >= min_length[0] + assert(i > 0); + trees_[i - 1] = sum; + } + + // Make concat node trying to resue existing CordRepConcat nodes we + // already collected in the concat_freelist_. + CordRep* MakeConcat(CordRep* left, CordRep* right) { + if (concat_freelist_ == nullptr) return RawConcat(left, right); + + CordRepConcat* rep = concat_freelist_; + if (concat_freelist_->left == nullptr) { + concat_freelist_ = nullptr; + } else { + concat_freelist_ = concat_freelist_->left->concat(); + } + SetConcatChildren(rep, left, right); + + return rep; + } + + static void CheckNode(CordRep* node) { + ABSL_INTERNAL_CHECK(node->length != 0u, ""); + if (node->IsConcat()) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); + ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + + node->concat()->right->length), + ""); + } + } + + size_t root_length_; + + // use an inlined vector instead of a flat array to get bounds checking + y_absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; + + // List of concat nodes we can re-use for Cord balancing. + CordRepConcat* concat_freelist_ = nullptr; +}; + +static CordRep* Rebalance(CordRep* node) { + VerifyTree(node); + assert(node->IsConcat()); + + if (node->length == 0) { + return nullptr; + } + + CordForest forest(node->length); + forest.Build(node); + return forest.ConcatNodes(); +} + +// -------------------------------------------------------------------- +// Comparators + +namespace { + +int ClampResult(int memcmp_res) { + return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); +} + +int CompareChunks(y_absl::string_view* lhs, y_absl::string_view* rhs, + size_t* size_to_compare) { + size_t compared_size = std::min(lhs->size(), rhs->size()); + assert(*size_to_compare >= compared_size); + *size_to_compare -= compared_size; + + int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); + if (memcmp_res != 0) return memcmp_res; + + lhs->remove_prefix(compared_size); + rhs->remove_prefix(compared_size); + + return 0; +} + +// This overload set computes comparison results from memcmp result. This +// interface is used inside GenericCompare below. Differet implementations +// are specialized for int and bool. For int we clamp result to {-1, 0, 1} +// set. For bool we just interested in "value == 0". +template <typename ResultType> +ResultType ComputeCompareResult(int memcmp_res) { + return ClampResult(memcmp_res); +} +template <> +bool ComputeCompareResult<bool>(int memcmp_res) { + return memcmp_res == 0; +} + +} // namespace + +// Helper routine. Locates the first flat or external chunk of the Cord without +// initializing the iterator, and returns a string_view referencing the data. +inline y_absl::string_view Cord::InlineRep::FindFlatStartPiece() const { + if (!is_tree()) { + return y_absl::string_view(data_.as_chars(), data_.inline_size()); + } + + CordRep* node = tree(); + if (node->IsFlat()) { + return y_absl::string_view(node->flat()->Data(), node->length); + } + + if (node->IsExternal()) { + return y_absl::string_view(node->external()->base, node->length); + } + + if (node->IsBtree()) { + CordRepBtree* tree = node->btree(); + int height = tree->height(); + while (--height >= 0) { + tree = tree->Edge(CordRepBtree::kFront)->btree(); + } + return tree->Data(tree->begin()); + } + + // Walk down the left branches until we hit a non-CONCAT node. + while (node->IsConcat()) { + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + assert(length != 0); + + if (node->IsSubstring()) { + offset = node->substring()->start; + node = node->substring()->child; + } + + if (node->IsFlat()) { + return y_absl::string_view(node->flat()->Data() + offset, length); + } + + assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here"); + + return y_absl::string_view(node->external()->base + offset, length); +} + +inline int Cord::CompareSlowPath(y_absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, y_absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + + // compared_size is inside first chunk. + y_absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : y_absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs.size()); + lhs_chunk.remove_prefix(compared_size); + rhs.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { + int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); + if (comparison_result != 0) return comparison_result; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); +} + +inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, y_absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + Cord::ChunkIterator rhs_it = rhs.chunk_begin(); + + // compared_size is inside both first chunks. + y_absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : y_absl::string_view(); + y_absl::string_view rhs_chunk = + (rhs_it.bytes_remaining_ != 0) ? *rhs_it : y_absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs_chunk.size()); + lhs_chunk.remove_prefix(compared_size); + rhs_chunk.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { + int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); + if (memcmp_res != 0) return memcmp_res; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs_chunk.empty()) - + static_cast<int>(lhs_chunk.empty()); +} + +inline y_absl::string_view Cord::GetFirstChunk(const Cord& c) { + return c.contents_.FindFlatStartPiece(); +} +inline y_absl::string_view Cord::GetFirstChunk(y_absl::string_view sv) { + return sv; +} + +// Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed +// that 'size_to_compare' is greater that size of smallest of first chunks. +template <typename ResultType, typename RHS> +ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare) { + y_absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); + y_absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); + + size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); + assert(size_to_compare >= compared_size); + int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size); + if (compared_size == size_to_compare || memcmp_res != 0) { + return ComputeCompareResult<ResultType>(memcmp_res); + } + + return ComputeCompareResult<ResultType>( + lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); +} + +bool Cord::EqualsImpl(y_absl::string_view rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +template <typename RHS> +inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size == rhs_size) { + return GenericCompare<int>(lhs, rhs, lhs_size); + } + if (lhs_size < rhs_size) { + auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); + return data_comp_res == 0 ? -1 : data_comp_res; + } + + auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); + return data_comp_res == 0 ? +1 : data_comp_res; +} + +int Cord::Compare(y_absl::string_view rhs) const { + return SharedCompareImpl(*this, rhs); +} + +int Cord::CompareImpl(const Cord& rhs) const { + return SharedCompareImpl(*this, rhs); +} + +bool Cord::EndsWith(y_absl::string_view rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +bool Cord::EndsWith(const Cord& rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +// -------------------------------------------------------------------- +// Misc. + +Cord::operator TString() const { + TString s; + y_absl::CopyCordToString(*this, &s); + return s; +} + +void CopyCordToString(const Cord& src, TString* dst) { + if (!src.contents_.is_tree()) { + src.contents_.CopyTo(dst); + } else { + y_absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); + src.CopyToArraySlowPath(&(*dst)[0]); + } +} + +void Cord::CopyToArraySlowPath(char* dst) const { + assert(contents_.is_tree()); + y_absl::string_view fragment; + if (GetFlatAux(contents_.tree(), &fragment)) { + memcpy(dst, fragment.data(), fragment.size()); + return; + } + for (y_absl::string_view chunk : Chunks()) { + memcpy(dst, chunk.data(), chunk.size()); + dst += chunk.size(); + } +} + +Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { + auto& stack_of_right_children = stack_of_right_children_; + if (stack_of_right_children.empty()) { + assert(!current_chunk_.empty()); // Called on invalid iterator. + // We have reached the end of the Cord. + return *this; + } + + // Process the next node on the stack. + CordRep* node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); + + // Walk down the left branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->IsConcat()) { + stack_of_right_children.push_back(node->concat()->right); + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->IsSubstring()) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->IsExternal() || node->IsFlat()); + assert(length != 0); + const char* data = + node->IsExternal() ? node->external()->base : node->flat()->Data(); + current_chunk_ = y_absl::string_view(data + offset, length); + current_leaf_ = node; + return *this; +} + +Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { + ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && + "Attempted to iterate past `end()`"); + Cord subcord; + auto constexpr method = CordzUpdateTracker::kCordReader; + + if (n <= InlineRep::kMaxInline) { + // Range to read fits in inline data. Flatten it. + char* data = subcord.contents_.set_data(n); + while (n > current_chunk_.size()) { + memcpy(data, current_chunk_.data(), current_chunk_.size()); + data += current_chunk_.size(); + n -= current_chunk_.size(); + ++*this; + } + memcpy(data, current_chunk_.data(), n); + if (n < current_chunk_.size()) { + RemoveChunkPrefix(n); + } else if (n > 0) { + ++*this; + } + return subcord; + } + + if (btree_reader_) { + size_t chunk_size = current_chunk_.size(); + if (n <= chunk_size && n <= kMaxBytesToCopy) { + subcord = Cord(current_chunk_.substr(0, n), method); + if (n < chunk_size) { + current_chunk_.remove_prefix(n); + } else { + current_chunk_ = btree_reader_.Next(); + } + } else { + CordRep* rep; + current_chunk_ = btree_reader_.Read(n, chunk_size, rep); + subcord.contents_.EmplaceTree(rep, method); + } + bytes_remaining_ -= n; + return subcord; + } + + auto& stack_of_right_children = stack_of_right_children_; + if (n < current_chunk_.size()) { + // Range to read is a proper subrange of the current chunk. + assert(current_leaf_ != nullptr); + CordRep* subnode = CordRep::Ref(current_leaf_); + const char* data = subnode->IsExternal() ? subnode->external()->base + : subnode->flat()->Data(); + subnode = NewSubstring(subnode, current_chunk_.data() - data, n); + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); + RemoveChunkPrefix(n); + return subcord; + } + + // Range to read begins with a proper subrange of the current chunk. + assert(!current_chunk_.empty()); + assert(current_leaf_ != nullptr); + CordRep* subnode = CordRep::Ref(current_leaf_); + if (current_chunk_.size() < subnode->length) { + const char* data = subnode->IsExternal() ? subnode->external()->base + : subnode->flat()->Data(); + subnode = NewSubstring(subnode, current_chunk_.data() - data, + current_chunk_.size()); + } + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, reading whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); + if (node->length > n) break; + // TODO(qrczak): This might unnecessarily recreate existing concat nodes. + // Avoiding that would need pretty complicated logic (instead of + // current_leaf, keep current_subtree_ which points to the highest node + // such that the current leaf can be found on the path of left children + // starting from current_subtree_; delay creating subnode while node is + // below current_subtree_; find the proper node along the path of left + // children starting from current_subtree_ if this loop exits while staying + // below current_subtree_; etc.; alternatively, push parents instead of + // right children on the stack). + subnode = Concat(subnode, CordRep::Ref(node)); + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); + return subcord; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->IsConcat()) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Read left, descend right. + subnode = Concat(subnode, CordRep::Ref(node->concat()->left)); + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->IsSubstring()) { + offset = node->substring()->start; + node = node->substring()->child; + } + + // Range to read ends with a proper (possibly empty) subrange of the current + // chunk. + assert(node->IsExternal() || node->IsFlat()); + assert(length > n); + if (n > 0) { + subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); + } + const char* data = + node->IsExternal() ? node->external()->base : node->flat()->Data(); + current_chunk_ = y_absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); + return subcord; +} + +void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + assert(n >= current_chunk_.size()); // This should only be called when + // iterating to a new node. + + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + if (stack_of_right_children_.empty()) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Process the next node(s) on the stack, skipping whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + auto& stack_of_right_children = stack_of_right_children_; + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); + if (node->length > n) break; + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->IsConcat()) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Skip left, descend right. + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->IsSubstring()) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->IsExternal() || node->IsFlat()); + assert(length > n); + const char* data = + node->IsExternal() ? node->external()->base : node->flat()->Data(); + current_chunk_ = y_absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; +} + +char Cord::operator[](size_t i) const { + ABSL_HARDENING_ASSERT(i < size()); + size_t offset = i; + const CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return contents_.data()[i]; + } + while (true) { + assert(rep != nullptr); + assert(offset < rep->length); + if (rep->IsFlat()) { + // Get the "i"th character directly from the flat array. + return rep->flat()->Data()[offset]; + } else if (rep->IsBtree()) { + return rep->btree()->GetCharacter(offset); + } else if (rep->IsExternal()) { + // Get the "i"th character from the external array. + return rep->external()->base[offset]; + } else if (rep->IsConcat()) { + // Recursively branch to the side of the concatenation that the "i"th + // character is on. + size_t left_length = rep->concat()->left->length; + if (offset < left_length) { + rep = rep->concat()->left; + } else { + offset -= left_length; + rep = rep->concat()->right; + } + } else { + // This must be a substring a node, so bypass it to get to the child. + assert(rep->IsSubstring()); + offset += rep->substring()->start; + rep = rep->substring()->child; + } + } +} + +y_absl::string_view Cord::FlattenSlowPath() { + assert(contents_.is_tree()); + size_t total_size = size(); + CordRep* new_rep; + char* new_buffer; + + // Try to put the contents into a new flat rep. If they won't fit in the + // biggest possible flat node, use an external rep instead. + if (total_size <= kMaxFlatLength) { + new_rep = CordRepFlat::New(total_size); + new_rep->length = total_size; + new_buffer = new_rep->flat()->Data(); + CopyToArraySlowPath(new_buffer); + } else { + new_buffer = std::allocator<char>().allocate(total_size); + CopyToArraySlowPath(new_buffer); + new_rep = y_absl::cord_internal::NewExternalRep( + y_absl::string_view(new_buffer, total_size), [](y_absl::string_view s) { + std::allocator<char>().deallocate(const_cast<char*>(s.data()), + s.size()); + }); + } + CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten); + CordRep::Unref(contents_.as_tree()); + contents_.SetTree(new_rep, scope); + return y_absl::string_view(new_buffer, total_size); +} + +/* static */ bool Cord::GetFlatAux(CordRep* rep, y_absl::string_view* fragment) { + assert(rep != nullptr); + if (rep->IsFlat()) { + *fragment = y_absl::string_view(rep->flat()->Data(), rep->length); + return true; + } else if (rep->IsExternal()) { + *fragment = y_absl::string_view(rep->external()->base, rep->length); + return true; + } else if (rep->IsBtree()) { + return rep->btree()->IsFlat(fragment); + } else if (rep->IsSubstring()) { + CordRep* child = rep->substring()->child; + if (child->IsFlat()) { + *fragment = y_absl::string_view( + child->flat()->Data() + rep->substring()->start, rep->length); + return true; + } else if (child->IsExternal()) { + *fragment = y_absl::string_view( + child->external()->base + rep->substring()->start, rep->length); + return true; + } else if (child->IsBtree()) { + return child->btree()->IsFlat(rep->substring()->start, rep->length, + fragment); + } + } + return false; +} + +/* static */ void Cord::ForEachChunkAux( + y_absl::cord_internal::CordRep* rep, + y_absl::FunctionRef<void(y_absl::string_view)> callback) { + if (rep->IsBtree()) { + ChunkIterator it(rep), end; + while (it != end) { + callback(*it); + ++it; + } + return; + } + + assert(rep != nullptr); + int stack_pos = 0; + constexpr int stack_max = 128; + // Stack of right branches for tree traversal + y_absl::cord_internal::CordRep* stack[stack_max]; + y_absl::cord_internal::CordRep* current_node = rep; + while (true) { + if (current_node->IsConcat()) { + if (stack_pos == stack_max) { + // There's no more room on our stack array to add another right branch, + // and the idea is to avoid allocations, so call this function + // recursively to navigate this subtree further. (This is not something + // we expect to happen in practice). + ForEachChunkAux(current_node, callback); + + // Pop the next right branch and iterate. + current_node = stack[--stack_pos]; + continue; + } else { + // Save the right branch for later traversal and continue down the left + // branch. + stack[stack_pos++] = current_node->concat()->right; + current_node = current_node->concat()->left; + continue; + } + } + // This is a leaf node, so invoke our callback. + y_absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); + } + if (stack_pos == 0) { + // end of traversal + return; + } + current_node = stack[--stack_pos]; + } +} + +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent) { + const int kIndentStep = 1; + y_absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; + y_absl::InlinedVector<int, kInlinedVectorSize> indents; + for (;;) { + *os << std::setw(3) << rep->refcount.Get(); + *os << " " << std::setw(7) << rep->length; + *os << " ["; + if (include_data) *os << static_cast<void*>(rep); + *os << "]"; + *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); + *os << " " << std::setw(indent) << ""; + if (rep->IsConcat()) { + *os << "CONCAT depth=" << Depth(rep) << "\n"; + indent += kIndentStep; + indents.push_back(indent); + stack.push_back(rep->concat()->right); + rep = rep->concat()->left; + } else if (rep->IsSubstring()) { + *os << "SUBSTRING @ " << rep->substring()->start << "\n"; + indent += kIndentStep; + rep = rep->substring()->child; + } else { // Leaf or ring + if (rep->IsExternal()) { + *os << "EXTERNAL ["; + if (include_data) + *os << y_absl::CEscape(TString(rep->external()->base, rep->length)); + *os << "]\n"; + } else if (rep->IsFlat()) { + *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; + if (include_data) + *os << y_absl::CEscape(TString(rep->flat()->Data(), rep->length)); + *os << "]\n"; + } else { + CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); + } + if (stack.empty()) break; + rep = stack.back(); + stack.pop_back(); + indent = indents.back(); + indents.pop_back(); + } + } + ABSL_INTERNAL_CHECK(indents.empty(), ""); +} + +static TString ReportError(CordRep* root, CordRep* node) { + std::ostringstream buf; + buf << "Error at node " << node << " in:"; + DumpNode(root, true, &buf); + return TString(buf.str()); +} + +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation) { + y_absl::InlinedVector<CordRep*, 2> worklist; + worklist.push_back(start_node); + do { + CordRep* node = worklist.back(); + worklist.pop_back(); + + ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); + if (node != root) { + ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + } + + if (node->IsConcat()) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + + node->concat()->right->length), + ReportError(root, node)); + if (full_validation) { + worklist.push_back(node->concat()->right); + worklist.push_back(node->concat()->left); + } + } else if (node->IsFlat()) { + ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(), + ReportError(root, node)); + } else if (node->IsExternal()) { + ABSL_INTERNAL_CHECK(node->external()->base != nullptr, + ReportError(root, node)); + } else if (node->IsSubstring()) { + ABSL_INTERNAL_CHECK( + node->substring()->start < node->substring()->child->length, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= + node->substring()->child->length, + ReportError(root, node)); + } + } while (!worklist.empty()); + return true; +} + +// Traverses the tree and computes the total memory allocated. +/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { + size_t total_mem_usage = 0; + + // Allow a quick exit for the common case that the root is a leaf. + if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { + return total_mem_usage; + } + + // Iterate over the tree. cur_node is never a leaf node and leaf nodes will + // never be appended to tree_stack. This reduces overhead from manipulating + // tree_stack. + y_absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; + const CordRep* cur_node = rep; + while (true) { + const CordRep* next_node = nullptr; + + if (cur_node->IsConcat()) { + total_mem_usage += sizeof(CordRepConcat); + const CordRep* left = cur_node->concat()->left; + if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { + next_node = left; + } + + const CordRep* right = cur_node->concat()->right; + if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { + if (next_node) { + tree_stack.push_back(next_node); + } + next_node = right; + } + } else if (cur_node->IsBtree()) { + total_mem_usage += sizeof(CordRepBtree); + const CordRepBtree* node = cur_node->btree(); + if (node->height() == 0) { + for (const CordRep* edge : node->Edges()) { + RepMemoryUsageDataEdge(edge, &total_mem_usage); + } + } else { + for (const CordRep* edge : node->Edges()) { + tree_stack.push_back(edge); + } + } + } else { + // Since cur_node is not a leaf or a concat node it must be a substring. + assert(cur_node->IsSubstring()); + total_mem_usage += sizeof(CordRepSubstring); + next_node = cur_node->substring()->child; + if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { + next_node = nullptr; + } + } + + if (!next_node) { + if (tree_stack.empty()) { + return total_mem_usage; + } + next_node = tree_stack.back(); + tree_stack.pop_back(); + } + cur_node = next_node; + } +} + +std::ostream& operator<<(std::ostream& out, const Cord& cord) { + for (y_absl::string_view chunk : cord.Chunks()) { + out.write(chunk.data(), chunk.size()); + } + return out; +} + +namespace strings_internal { +size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; } +size_t CordTestAccess::FlatTagToLength(uint8_t tag) { + return cord_internal::TagToLength(tag); +} +uint8_t CordTestAccess::LengthToTag(size_t s) { + ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, y_absl::StrCat("Invalid length ", s)); + return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); +} +size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } +size_t CordTestAccess::SizeofCordRepExternal() { + return sizeof(CordRepExternal); +} +size_t CordTestAccess::SizeofCordRepSubstring() { + return sizeof(CordRepSubstring); +} +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.h new file mode 100644 index 0000000000..62359e0cf8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord.h @@ -0,0 +1,1521 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: cord.h +// ----------------------------------------------------------------------------- +// +// This file defines the `y_absl::Cord` data structure and operations on that data +// structure. A Cord is a string-like sequence of characters optimized for +// specific use cases. Unlike a `TString`, which stores an array of +// contiguous characters, Cord data is stored in a structure consisting of +// separate, reference-counted "chunks." (Currently, this implementation is a +// tree structure, though that implementation may change.) +// +// Because a Cord consists of these chunks, data can be added to or removed from +// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a +// `TString`, a Cord can therefore accommodate data that changes over its +// lifetime, though it's not quite "mutable"; it can change only in the +// attachment, detachment, or rearrangement of chunks of its constituent data. +// +// A Cord provides some benefit over `TString` under the following (albeit +// narrow) circumstances: +// +// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord +// provides efficient insertions and deletions at the start and end of the +// character sequences, avoiding copies in those cases. Static data should +// generally be stored as strings. +// * External memory consisting of string-like data can be directly added to +// a Cord without requiring copies or allocations. +// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write +// implementation and cheap sub-Cord operations. Copying a Cord is an O(1) +// operation. +// +// As a consequence to the above, Cord data is generally large. Small data +// should generally use strings, as construction of a Cord requires some +// overhead. Small Cords (<= 15 bytes) are represented inline, but most small +// Cords are expected to grow over their lifetimes. +// +// Note that because a Cord is made up of separate chunked data, random access +// to character data within a Cord is slower than within a `TString`. +// +// Thread Safety +// +// Cord has the same thread-safety properties as many other types like +// TString, std::vector<>, int, etc -- it is thread-compatible. In +// particular, if threads do not call non-const methods, then it is safe to call +// const methods without synchronization. Copying a Cord produces a new instance +// that can be used concurrently with the original in arbitrary ways. + +#ifndef ABSL_STRINGS_CORD_H_ +#define ABSL_STRINGS_CORD_H_ + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iosfwd> +#include <iterator> +#include <util/generic/string.h> +#include <type_traits> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/endian.h" +#include "y_absl/base/internal/per_thread_tls.h" +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" +#include "y_absl/container/inlined_vector.h" +#include "y_absl/functional/function_ref.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_btree_reader.h" +#include "y_absl/strings/internal/cord_rep_ring.h" +#include "y_absl/strings/internal/cordz_functions.h" +#include "y_absl/strings/internal/cordz_info.h" +#include "y_absl/strings/internal/cordz_statistics.h" +#include "y_absl/strings/internal/cordz_update_scope.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/internal/string_constant.h" +#include "y_absl/strings/string_view.h" +#include "y_absl/types/optional.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +class Cord; +class CordTestPeer; +template <typename Releaser> +Cord MakeCordFromExternal(y_absl::string_view, Releaser&&); +void CopyCordToString(const Cord& src, TString* dst); + +// Cord +// +// A Cord is a sequence of characters, designed to be more efficient than a +// `TString` in certain circumstances: namely, large string data that needs +// to change over its lifetime or shared, especially when such data is shared +// across API boundaries. +// +// A Cord stores its character data in a structure that allows efficient prepend +// and append operations. This makes a Cord useful for large string data sent +// over in a wire format that may need to be prepended or appended at some point +// during the data exchange (e.g. HTTP, protocol buffers). For example, a +// Cord is useful for storing an HTTP request, and prepending an HTTP header to +// such a request. +// +// Cords should not be used for storing general string data, however. They +// require overhead to construct and are slower than strings for random access. +// +// The Cord API provides the following common API operations: +// +// * Create or assign Cords out of existing string data, memory, or other Cords +// * Append and prepend data to an existing Cord +// * Create new Sub-Cords from existing Cord data +// * Swap Cord data and compare Cord equality +// * Write out Cord data by constructing a `TString` +// +// Additionally, the API provides iterator utilities to iterate through Cord +// data via chunks or character bytes. +// +class Cord { + private: + template <typename T> + using EnableIfString = + y_absl::enable_if_t<std::is_same<T, TString>::value, int>; + + public: + // Cord::Cord() Constructors. + + // Creates an empty Cord. + constexpr Cord() noexcept; + + // Creates a Cord from an existing Cord. Cord is copyable and efficiently + // movable. The moved-from state is valid but unspecified. + Cord(const Cord& src); + Cord(Cord&& src) noexcept; + Cord& operator=(const Cord& x); + Cord& operator=(Cord&& x) noexcept; + + // Creates a Cord from a `src` string. This constructor is marked explicit to + // prevent implicit Cord constructions from arguments convertible to an + // `y_absl::string_view`. + explicit Cord(y_absl::string_view src); + Cord& operator=(y_absl::string_view src); + + // Creates a Cord from a `TString&&` rvalue. These constructors are + // templated to avoid ambiguities for types that are convertible to both + // `y_absl::string_view` and `TString`, such as `const char*`. + template <typename T, EnableIfString<T> = 0> + explicit Cord(T&& src); + template <typename T, EnableIfString<T> = 0> + Cord& operator=(T&& src); + + // Cord::~Cord() + // + // Destructs the Cord. + ~Cord() { + if (contents_.is_tree()) DestroyCordSlow(); + } + + // MakeCordFromExternal() + // + // Creates a Cord that takes ownership of external string memory. The + // contents of `data` are not copied to the Cord; instead, the external + // memory is added to the Cord and reference-counted. This data may not be + // changed for the life of the Cord, though it may be prepended or appended + // to. + // + // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when + // the reference count for `data` reaches zero. As noted above, this data must + // remain live until the releaser is invoked. The callable releaser also must: + // + // * be move constructible + // * support `void operator()(y_absl::string_view) const` or `void operator()` + // + // Example: + // + // Cord MakeCord(BlockPool* pool) { + // Block* block = pool->NewBlock(); + // FillBlock(block); + // return y_absl::MakeCordFromExternal( + // block->ToStringView(), + // [pool, block](y_absl::string_view v) { + // pool->FreeBlock(block, v); + // }); + // } + // + // WARNING: Because a Cord can be reference-counted, it's likely a bug if your + // releaser doesn't do anything. For example, consider the following: + // + // void Foo(const char* buffer, int len) { + // auto c = y_absl::MakeCordFromExternal(y_absl::string_view(buffer, len), + // [](y_absl::string_view) {}); + // + // // BUG: If Bar() copies its cord for any reason, including keeping a + // // substring of it, the lifetime of buffer might be extended beyond + // // when Foo() returns. + // Bar(c); + // } + template <typename Releaser> + friend Cord MakeCordFromExternal(y_absl::string_view data, Releaser&& releaser); + + // Cord::Clear() + // + // Releases the Cord data. Any nodes that share data with other Cords, if + // applicable, will have their reference counts reduced by 1. + void Clear(); + + // Cord::Append() + // + // Appends data to the Cord, which may come from another Cord or other string + // data. + void Append(const Cord& src); + void Append(Cord&& src); + void Append(y_absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Append(T&& src); + + // Cord::Prepend() + // + // Prepends data to the Cord, which may come from another Cord or other string + // data. + void Prepend(const Cord& src); + void Prepend(y_absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Prepend(T&& src); + + // Cord::RemovePrefix() + // + // Removes the first `n` bytes of a Cord. + void RemovePrefix(size_t n); + void RemoveSuffix(size_t n); + + // Cord::Subcord() + // + // Returns a new Cord representing the subrange [pos, pos + new_size) of + // *this. If pos >= size(), the result is empty(). If + // (pos + new_size) >= size(), the result is the subrange [pos, size()). + Cord Subcord(size_t pos, size_t new_size) const; + + // Cord::swap() + // + // Swaps the contents of the Cord with `other`. + void swap(Cord& other) noexcept; + + // swap() + // + // Swaps the contents of two Cords. + friend void swap(Cord& x, Cord& y) noexcept { x.swap(y); } + + // Cord::size() + // + // Returns the size of the Cord. + size_t size() const; + + // Cord::empty() + // + // Determines whether the given Cord is empty, returning `true` is so. + bool empty() const; + + // Cord::EstimatedMemoryUsage() + // + // Returns the *approximate* number of bytes held in full or in part by this + // Cord (which may not remain the same between invocations). Note that Cords + // that share memory could each be "charged" independently for the same shared + // memory. + size_t EstimatedMemoryUsage() const; + + // Cord::Compare() + // + // Compares 'this' Cord with rhs. This function and its relatives treat Cords + // as sequences of unsigned bytes. The comparison is a straightforward + // lexicographic comparison. `Cord::Compare()` returns values as follows: + // + // -1 'this' Cord is smaller + // 0 two Cords are equal + // 1 'this' Cord is larger + int Compare(y_absl::string_view rhs) const; + int Compare(const Cord& rhs) const; + + // Cord::StartsWith() + // + // Determines whether the Cord starts with the passed string data `rhs`. + bool StartsWith(const Cord& rhs) const; + bool StartsWith(y_absl::string_view rhs) const; + + // Cord::EndsWith() + // + // Determines whether the Cord ends with the passed string data `rhs`. + bool EndsWith(y_absl::string_view rhs) const; + bool EndsWith(const Cord& rhs) const; + + // Cord::operator TString() + // + // Converts a Cord into a `TString()`. This operator is marked explicit to + // prevent unintended Cord usage in functions that take a string. + explicit operator TString() const; + + // CopyCordToString() + // + // Copies the contents of a `src` Cord into a `*dst` string. + // + // This function optimizes the case of reusing the destination string since it + // can reuse previously allocated capacity. However, this function does not + // guarantee that pointers previously returned by `dst->data()` remain valid + // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new + // object, prefer to simply use the conversion operator to `TString`. + friend void CopyCordToString(const Cord& src, TString* dst); + + class CharIterator; + + //---------------------------------------------------------------------------- + // Cord::ChunkIterator + //---------------------------------------------------------------------------- + // + // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its + // Cord. Such iteration allows you to perform non-const operatons on the data + // of a Cord without modifying it. + // + // Generally, you do not instantiate a `Cord::ChunkIterator` directly; + // instead, you create one implicitly through use of the `Cord::Chunks()` + // member function. + // + // The `Cord::ChunkIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * The `string_view` returned by dereferencing a valid, non-`end()` + // iterator is guaranteed to be non-empty. + // * Two `ChunkIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator in this case is a proxy iterator; the `string_view` + // returned by the iterator does not live inside the Cord, and its + // lifetime is limited to the lifetime of the iterator itself. To help + // prevent lifetime issues, `ChunkIterator::reference` is not a true + // reference type and is equivalent to `value_type`. + // * The iterator keeps state that can grow for Cords that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + class ChunkIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = y_absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = value_type; + + ChunkIterator() = default; + + ChunkIterator& operator++(); + ChunkIterator operator++(int); + bool operator==(const ChunkIterator& other) const; + bool operator!=(const ChunkIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend class Cord; + friend class CharIterator; + + private: + using CordRep = y_absl::cord_internal::CordRep; + using CordRepBtree = y_absl::cord_internal::CordRepBtree; + using CordRepBtreeReader = y_absl::cord_internal::CordRepBtreeReader; + + // Stack of right children of concat nodes that we have to visit. + // Keep this at the end of the structure to avoid cache-thrashing. + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + using Stack = y_absl::InlinedVector<y_absl::cord_internal::CordRep*, 47>; + + // Constructs a `begin()` iterator from `tree`. `tree` must not be null. + explicit ChunkIterator(cord_internal::CordRep* tree); + + // Constructs a `begin()` iterator from `cord`. + explicit ChunkIterator(const Cord* cord); + + // Initializes this instance from a tree. Invoked by constructors. + void InitTree(cord_internal::CordRep* tree); + + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than + // `current_chunk_.size()`. + void RemoveChunkPrefix(size_t n); + Cord AdvanceAndReadBytes(size_t n); + void AdvanceBytes(size_t n); + + // Stack specific operator++ + ChunkIterator& AdvanceStack(); + + // Btree specific operator++ + ChunkIterator& AdvanceBtree(); + void AdvanceBytesBtree(size_t n); + + // Iterates `n` bytes, where `n` is expected to be greater than or equal to + // `current_chunk_.size()`. + void AdvanceBytesSlowPath(size_t n); + + // A view into bytes of the current `CordRep`. It may only be a view to a + // suffix of bytes if this is being used by `CharIterator`. + y_absl::string_view current_chunk_; + // The current leaf, or `nullptr` if the iterator points to short data. + // If the current chunk is a substring node, current_leaf_ points to the + // underlying flat or external node. + y_absl::cord_internal::CordRep* current_leaf_ = nullptr; + // The number of bytes left in the `Cord` over which we are iterating. + size_t bytes_remaining_ = 0; + + // Cord reader for cord btrees. Empty if not traversing a btree. + CordRepBtreeReader btree_reader_; + + // See 'Stack' alias definition. + Stack stack_of_right_children_; + }; + + // Cord::ChunkIterator::chunk_begin() + // + // Returns an iterator to the first chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops are not useful. + // + // Example: + // + // y_absl::Cord::ChunkIterator FindAsChunk(const y_absl::Cord& c, + // y_absl::string_view s) { + // return std::find(c.chunk_begin(), c.chunk_end(), s); + // } + ChunkIterator chunk_begin() const; + + // Cord::ChunkItertator::chunk_end() + // + // Returns an iterator one increment past the last chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops may not be available. + ChunkIterator chunk_end() const; + + //---------------------------------------------------------------------------- + // Cord::ChunkIterator::ChunkRange + //---------------------------------------------------------------------------- + // + // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, + // producing an iterator which can be used within a range-based for loop. + // Construction of a `ChunkRange` will return an iterator pointing to the + // first chunk of the Cord. Generally, do not construct a `ChunkRange` + // directly; instead, prefer to use the `Cord::Chunks()` method. + // + // Implementation note: `ChunkRange` is simply a convenience wrapper over + // `Cord::chunk_begin()` and `Cord::chunk_end()`. + class ChunkRange { + public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow ChunkRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = y_absl::string_view; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = ChunkIterator; + using const_iterator = ChunkIterator; + + explicit ChunkRange(const Cord* cord) : cord_(cord) {} + + ChunkIterator begin() const; + ChunkIterator end() const; + + private: + const Cord* cord_; + }; + + // Cord::Chunks() + // + // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks + // of a `Cord` with a range-based for-loop. For most iteration tasks on a + // Cord, use `Cord::Chunks()` to retrieve this iterator. + // + // Example: + // + // void ProcessChunks(const Cord& cord) { + // for (y_absl::string_view chunk : cord.Chunks()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (y_absl::string_view chunk : CordFactory().Chunks()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + ChunkRange Chunks() const; + + //---------------------------------------------------------------------------- + // Cord::CharIterator + //---------------------------------------------------------------------------- + // + // A `Cord::CharIterator` allows iteration over the constituent characters of + // a `Cord`. + // + // Generally, you do not instantiate a `Cord::CharIterator` directly; instead, + // you create one implicitly through use of the `Cord::Chars()` member + // function. + // + // A `Cord::CharIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * Two `CharIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator keeps state that can grow for Cords that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + // * This type cannot act as a forward iterator because a `Cord` can reuse + // sections of memory. This fact violates the requirement for forward + // iterators to compare equal if dereferencing them returns the same + // object. + class CharIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = char; + using difference_type = ptrdiff_t; + using pointer = const char*; + using reference = const char&; + + CharIterator() = default; + + CharIterator& operator++(); + CharIterator operator++(int); + bool operator==(const CharIterator& other) const; + bool operator!=(const CharIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend Cord; + + private: + explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {} + + ChunkIterator chunk_iterator_; + }; + + // Cord::CharIterator::AdvanceAndRead() + // + // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes + // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the + // number of bytes within the Cord; otherwise, behavior is undefined. It is + // valid to pass `char_end()` and `0`. + static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); + + // Cord::CharIterator::Advance() + // + // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than + // or equal to the number of bytes remaining within the Cord; otherwise, + // behavior is undefined. It is valid to pass `char_end()` and `0`. + static void Advance(CharIterator* it, size_t n_bytes); + + // Cord::CharIterator::ChunkRemaining() + // + // Returns the longest contiguous view starting at the iterator's position. + // + // `it` must be dereferenceable. + static y_absl::string_view ChunkRemaining(const CharIterator& it); + + // Cord::CharIterator::char_begin() + // + // Returns an iterator to the first character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops may not be available. + CharIterator char_begin() const; + + // Cord::CharIterator::char_end() + // + // Returns an iterator to one past the last character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops are not useful. + CharIterator char_end() const; + + // Cord::CharIterator::CharRange + // + // `CharRange` is a helper class for iterating over the characters of a + // producing an iterator which can be used within a range-based for loop. + // Construction of a `CharRange` will return an iterator pointing to the first + // character of the Cord. Generally, do not construct a `CharRange` directly; + // instead, prefer to use the `Cord::Chars()` method show below. + // + // Implementation note: `CharRange` is simply a convenience wrapper over + // `Cord::char_begin()` and `Cord::char_end()`. + class CharRange { + public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow CharRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = CharIterator; + using const_iterator = CharIterator; + + explicit CharRange(const Cord* cord) : cord_(cord) {} + + CharIterator begin() const; + CharIterator end() const; + + private: + const Cord* cord_; + }; + + // Cord::CharIterator::Chars() + // + // Returns a `Cord::CharIterator` for iterating over the characters of a + // `Cord` with a range-based for-loop. For most character-based iteration + // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator. + // + // Example: + // + // void ProcessCord(const Cord& cord) { + // for (char c : cord.Chars()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (char c : CordFactory().Chars()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + CharRange Chars() const; + + // Cord::operator[] + // + // Gets the "i"th character of the Cord and returns it, provided that + // 0 <= i < Cord.size(). + // + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic based on the number of chunks that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/ChunkIterator rather than call operator[] or Get() + // repeatedly in a loop. + char operator[](size_t i) const; + + // Cord::TryFlat() + // + // If this cord's representation is a single flat array, returns a + // string_view referencing that array. Otherwise returns nullopt. + y_absl::optional<y_absl::string_view> TryFlat() const; + + // Cord::Flatten() + // + // Flattens the cord into a single array and returns a view of the data. + // + // If the cord was already flat, the contents are not modified. + y_absl::string_view Flatten(); + + // Supports y_absl::Cord as a sink object for y_absl::Format(). + friend void AbslFormatFlush(y_absl::Cord* cord, y_absl::string_view part) { + cord->Append(part); + } + + template <typename H> + friend H AbslHashValue(H hash_state, const y_absl::Cord& c) { + y_absl::optional<y_absl::string_view> maybe_flat = c.TryFlat(); + if (maybe_flat.has_value()) { + return H::combine(std::move(hash_state), *maybe_flat); + } + return c.HashFragmented(std::move(hash_state)); + } + + // Create a Cord with the contents of StringConstant<T>::value. + // No allocations will be done and no data will be copied. + // This is an INTERNAL API and subject to change or removal. This API can only + // be used by spelling y_absl::strings_internal::MakeStringConstant, which is + // also an internal API. + template <typename T> + explicit constexpr Cord(strings_internal::StringConstant<T>); + + private: + using CordRep = y_absl::cord_internal::CordRep; + using CordRepFlat = y_absl::cord_internal::CordRepFlat; + using CordzInfo = cord_internal::CordzInfo; + using CordzUpdateScope = cord_internal::CordzUpdateScope; + using CordzUpdateTracker = cord_internal::CordzUpdateTracker; + using InlineData = cord_internal::InlineData; + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Creates a cord instance with `method` representing the originating + // public API call causing the cord to be created. + explicit Cord(y_absl::string_view src, MethodIdentifier method); + + friend class CordTestPeer; + friend bool operator==(const Cord& lhs, const Cord& rhs); + friend bool operator==(const Cord& lhs, y_absl::string_view rhs); + + friend const CordzInfo* GetCordzInfoForTesting(const Cord& cord); + + // Calls the provided function once for each cord chunk, in order. Unlike + // Chunks(), this API will not allocate memory. + void ForEachChunk(y_absl::FunctionRef<void(y_absl::string_view)>) const; + + // Allocates new contiguous storage for the contents of the cord. This is + // called by Flatten() when the cord was not already flat. + y_absl::string_view FlattenSlowPath(); + + // Actual cord contents are hidden inside the following simple + // class so that we can isolate the bulk of cord.cc from changes + // to the representation. + // + // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. + class InlineRep { + public: + static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; + static_assert(kMaxInline >= sizeof(y_absl::cord_internal::CordRep*), ""); + + constexpr InlineRep() : data_() {} + explicit InlineRep(InlineData::DefaultInitType init) : data_(init) {} + InlineRep(const InlineRep& src); + InlineRep(InlineRep&& src); + InlineRep& operator=(const InlineRep& src); + InlineRep& operator=(InlineRep&& src) noexcept; + + explicit constexpr InlineRep(cord_internal::InlineData data); + + void Swap(InlineRep* rhs); + bool empty() const; + size_t size() const; + const char* data() const; // Returns nullptr if holding pointer + void set_data(const char* data, size_t n, + bool nullify_tail); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result + // Returns nullptr if holding bytes + y_absl::cord_internal::CordRep* tree() const; + y_absl::cord_internal::CordRep* as_tree() const; + // Returns non-null iff was holding a pointer + y_absl::cord_internal::CordRep* clear(); + // Converts to pointer if necessary. + void reduce_size(size_t n); // REQUIRES: holding data + void remove_prefix(size_t n); // REQUIRES: holding data + void AppendArray(y_absl::string_view src, MethodIdentifier method); + y_absl::string_view FindFlatStartPiece() const; + + // Creates a CordRepFlat instance from the current inlined data with `extra' + // bytes of desired additional capacity. + CordRepFlat* MakeFlatWithExtraCapacity(size_t extra); + + // Sets the tree value for this instance. `rep` must not be null. + // Requires the current instance to hold a tree, and a lock to be held on + // any CordzInfo referenced by this instance. The latter is enforced through + // the CordzUpdateScope argument. If the current instance is sampled, then + // the CordzInfo instance is updated to reference the new `rep` value. + void SetTree(CordRep* rep, const CordzUpdateScope& scope); + + // Identical to SetTree(), except that `rep` is allowed to be null, in + // which case the current instance is reset to an empty value. + void SetTreeOrEmpty(CordRep* rep, const CordzUpdateScope& scope); + + // Sets the tree value for this instance, and randomly samples this cord. + // This function disregards existing contents in `data_`, and should be + // called when a Cord is 'promoted' from an 'uninitialized' or 'inlined' + // value to a non-inlined (tree / ring) value. + void EmplaceTree(CordRep* rep, MethodIdentifier method); + + // Identical to EmplaceTree, except that it copies the parent stack from + // the provided `parent` data if the parent is sampled. + void EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method); + + // Commits the change of a newly created, or updated `rep` root value into + // this cord. `old_rep` indicates the old (inlined or tree) value of the + // cord, and determines if the commit invokes SetTree() or EmplaceTree(). + void CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, MethodIdentifier method); + + void AppendTreeToInlined(CordRep* tree, MethodIdentifier method); + void AppendTreeToTree(CordRep* tree, MethodIdentifier method); + void AppendTree(CordRep* tree, MethodIdentifier method); + void PrependTreeToInlined(CordRep* tree, MethodIdentifier method); + void PrependTreeToTree(CordRep* tree, MethodIdentifier method); + void PrependTree(CordRep* tree, MethodIdentifier method); + + template <bool has_length> + void GetAppendRegion(char** region, size_t* size, size_t length); + + bool IsSame(const InlineRep& other) const { + return memcmp(&data_, &other.data_, sizeof(data_)) == 0; + } + int BitwiseCompare(const InlineRep& other) const { + uint64_t x, y; + // Use memcpy to avoid aliasing issues. + memcpy(&x, &data_, sizeof(x)); + memcpy(&y, &other.data_, sizeof(y)); + if (x == y) { + memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x)); + memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y)); + if (x == y) return 0; + } + return y_absl::big_endian::FromHost64(x) < y_absl::big_endian::FromHost64(y) + ? -1 + : 1; + } + void CopyTo(TString* dst) const { + // memcpy is much faster when operating on a known size. On most supported + // platforms, the small string optimization is large enough that resizing + // to 15 bytes does not cause a memory allocation. + y_absl::strings_internal::STLStringResizeUninitialized(dst, + sizeof(data_) - 1); + memcpy(&(*dst)[0], &data_, sizeof(data_) - 1); + // erase is faster than resize because the logic for memory allocation is + // not needed. + dst->erase(inline_size()); + } + + // Copies the inline contents into `dst`. Assumes the cord is not empty. + void CopyToArray(char* dst) const; + + bool is_tree() const { return data_.is_tree(); } + + // Returns true if the Cord is being profiled by cordz. + bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); } + + // Returns the profiled CordzInfo, or nullptr if not sampled. + y_absl::cord_internal::CordzInfo* cordz_info() const { + return data_.cordz_info(); + } + + // Sets the profiled CordzInfo. `cordz_info` must not be null. + void set_cordz_info(cord_internal::CordzInfo* cordz_info) { + assert(cordz_info != nullptr); + data_.set_cordz_info(cordz_info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { data_.clear_cordz_info(); } + + private: + friend class Cord; + + void AssignSlow(const InlineRep& src); + // Unrefs the tree and stops profiling. + void UnrefTree(); + + void ResetToEmpty() { data_ = {}; } + + void set_inline_size(size_t size) { data_.set_inline_size(size); } + size_t inline_size() const { return data_.inline_size(); } + + cord_internal::InlineData data_; + }; + InlineRep contents_; + + // Helper for MemoryUsage(). + static size_t MemoryUsageAux(const y_absl::cord_internal::CordRep* rep); + + // Helper for GetFlat() and TryFlat(). + static bool GetFlatAux(y_absl::cord_internal::CordRep* rep, + y_absl::string_view* fragment); + + // Helper for ForEachChunk(). + static void ForEachChunkAux( + y_absl::cord_internal::CordRep* rep, + y_absl::FunctionRef<void(y_absl::string_view)> callback); + + // The destructor for non-empty Cords. + void DestroyCordSlow(); + + // Out-of-line implementation of slower parts of logic. + void CopyToArraySlowPath(char* dst) const; + int CompareSlowPath(y_absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const; + int CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const; + bool EqualsImpl(y_absl::string_view rhs, size_t size_to_compare) const; + bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; + int CompareImpl(const Cord& rhs) const; + + template <typename ResultType, typename RHS> + friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare); + static y_absl::string_view GetFirstChunk(const Cord& c); + static y_absl::string_view GetFirstChunk(y_absl::string_view sv); + + // Returns a new reference to contents_.tree(), or steals an existing + // reference if called on an rvalue. + y_absl::cord_internal::CordRep* TakeRep() const&; + y_absl::cord_internal::CordRep* TakeRep() &&; + + // Helper for Append(). + template <typename C> + void AppendImpl(C&& src); + + // Prepends the provided data to this instance. `method` contains the public + // API method for this action which is tracked for Cordz sampling purposes. + void PrependArray(y_absl::string_view src, MethodIdentifier method); + + // Assigns the value in 'src' to this instance, 'stealing' its contents. + // Requires src.length() > kMaxBytesToCopy. + Cord& AssignLargeString(TString&& src); + + // Helper for AbslHashValue(). + template <typename H> + H HashFragmented(H hash_state) const { + typename H::AbslInternalPiecewiseCombiner combiner; + ForEachChunk([&combiner, &hash_state](y_absl::string_view chunk) { + hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), + chunk.size()); + }); + return H::combine(combiner.finalize(std::move(hash_state)), size()); + } +}; + +ABSL_NAMESPACE_END +} // namespace y_absl + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// allow a Cord to be logged +extern std::ostream& operator<<(std::ostream& out, const Cord& cord); + +// ------------------------------------------------------------------ +// Internal details follow. Clients should ignore. + +namespace cord_internal { + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 16 bytes. +inline void SmallMemmove(char* dst, const char* src, size_t n, + bool nullify_tail = false) { + if (n >= 8) { + assert(n <= 16); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 8, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +// Does non-template-specific `CordRepExternal` initialization. +// Expects `data` to be non-empty. +void InitializeCordRepExternal(y_absl::string_view data, CordRepExternal* rep); + +// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer +// to it, or `nullptr` if `data` was empty. +template <typename Releaser> +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +CordRep* NewExternalRep(y_absl::string_view data, Releaser&& releaser) { + using ReleaserType = y_absl::decay_t<Releaser>; + if (data.empty()) { + // Never create empty external nodes. + InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + data); + return nullptr; + } + + CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( + std::forward<Releaser>(releaser), 0); + InitializeCordRepExternal(data, rep); + return rep; +} + +// Overload for function reference types that dispatches using a function +// pointer because there are no `alignof()` or `sizeof()` a function reference. +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +inline CordRep* NewExternalRep(y_absl::string_view data, + void (&releaser)(y_absl::string_view)) { + return NewExternalRep(data, &releaser); +} + +} // namespace cord_internal + +template <typename Releaser> +Cord MakeCordFromExternal(y_absl::string_view data, Releaser&& releaser) { + Cord cord; + if (auto* rep = ::y_absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))) { + cord.contents_.EmplaceTree(rep, + Cord::MethodIdentifier::kMakeCordFromExternal); + } + return cord; +} + +constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) + : data_(data) {} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) + : data_(InlineData::kDefaultInit) { + if (CordRep* tree = src.tree()) { + EmplaceTree(CordRep::Ref(tree), src.data_, + CordzUpdateTracker::kConstructorCord); + } else { + data_ = src.data_; + } +} + +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) : data_(src.data_) { + src.ResetToEmpty(); +} + +inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { + if (this == &src) { + return *this; + } + if (!is_tree() && !src.is_tree()) { + data_ = src.data_; + return *this; + } + AssignSlow(src); + return *this; +} + +inline Cord::InlineRep& Cord::InlineRep::operator=( + Cord::InlineRep&& src) noexcept { + if (is_tree()) { + UnrefTree(); + } + data_ = src.data_; + src.ResetToEmpty(); + return *this; +} + +inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { + if (rhs == this) { + return; + } + std::swap(data_, rhs->data_); +} + +inline const char* Cord::InlineRep::data() const { + return is_tree() ? nullptr : data_.as_chars(); +} + +inline y_absl::cord_internal::CordRep* Cord::InlineRep::as_tree() const { + assert(data_.is_tree()); + return data_.as_tree(); +} + +inline y_absl::cord_internal::CordRep* Cord::InlineRep::tree() const { + if (is_tree()) { + return as_tree(); + } else { + return nullptr; + } +} + +inline bool Cord::InlineRep::empty() const { return data_.is_empty(); } + +inline size_t Cord::InlineRep::size() const { + return is_tree() ? as_tree()->length : inline_size(); +} + +inline cord_internal::CordRepFlat* Cord::InlineRep::MakeFlatWithExtraCapacity( + size_t extra) { + static_assert(cord_internal::kMinFlatLength >= sizeof(data_), ""); + size_t len = data_.inline_size(); + auto* result = CordRepFlat::New(len + extra); + result->length = len; + memcpy(result->Data(), data_.as_chars(), sizeof(data_)); + return result; +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, + MethodIdentifier method) { + assert(rep); + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, method); +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method) { + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, parent, method); +} + +inline void Cord::InlineRep::SetTree(CordRep* rep, + const CordzUpdateScope& scope) { + assert(rep); + assert(data_.is_tree()); + data_.set_tree(rep); + scope.SetCordRep(rep); +} + +inline void Cord::InlineRep::SetTreeOrEmpty(CordRep* rep, + const CordzUpdateScope& scope) { + assert(data_.is_tree()); + if (rep) { + data_.set_tree(rep); + } else { + data_ = {}; + } + scope.SetCordRep(rep); +} + +inline void Cord::InlineRep::CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, + MethodIdentifier method) { + if (old_rep) { + SetTree(rep, scope); + } else { + EmplaceTree(rep, method); + } +} + +inline y_absl::cord_internal::CordRep* Cord::InlineRep::clear() { + if (is_tree()) { + CordzInfo::MaybeUntrackCord(cordz_info()); + } + y_absl::cord_internal::CordRep* result = tree(); + ResetToEmpty(); + return result; +} + +inline void Cord::InlineRep::CopyToArray(char* dst) const { + assert(!is_tree()); + size_t n = inline_size(); + assert(n != 0); + cord_internal::SmallMemmove(dst, data_.as_chars(), n); +} + +constexpr inline Cord::Cord() noexcept {} + +inline Cord::Cord(y_absl::string_view src) + : Cord(src, CordzUpdateTracker::kConstructorString) {} + +template <typename T> +constexpr Cord::Cord(strings_internal::StringConstant<T>) + : contents_(strings_internal::StringConstant<T>::value.size() <= + cord_internal::kMaxInline + ? cord_internal::InlineData( + strings_internal::StringConstant<T>::value) + : cord_internal::InlineData( + &cord_internal::ConstInitExternalStorage< + strings_internal::StringConstant<T>>::value)) {} + +inline Cord& Cord::operator=(const Cord& x) { + contents_ = x.contents_; + return *this; +} + +template <typename T, Cord::EnableIfString<T>> +Cord& Cord::operator=(T&& src) { + if (src.size() <= cord_internal::kMaxBytesToCopy) { + return operator=(y_absl::string_view(src)); + } else { + return AssignLargeString(std::forward<T>(src)); + } +} + +inline Cord::Cord(const Cord& src) : contents_(src.contents_) {} + +inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} + +inline void Cord::swap(Cord& other) noexcept { + contents_.Swap(&other.contents_); +} + +inline Cord& Cord::operator=(Cord&& x) noexcept { + contents_ = std::move(x.contents_); + return *this; +} + +extern template Cord::Cord(TString&& src); + +inline size_t Cord::size() const { + // Length is 1st field in str.rep_ + return contents_.size(); +} + +inline bool Cord::empty() const { return contents_.empty(); } + +inline size_t Cord::EstimatedMemoryUsage() const { + size_t result = sizeof(Cord); + if (const y_absl::cord_internal::CordRep* rep = contents_.tree()) { + result += MemoryUsageAux(rep); + } + return result; +} + +inline y_absl::optional<y_absl::string_view> Cord::TryFlat() const { + y_absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return y_absl::string_view(contents_.data(), contents_.size()); + } + y_absl::string_view fragment; + if (GetFlatAux(rep, &fragment)) { + return fragment; + } + return y_absl::nullopt; +} + +inline y_absl::string_view Cord::Flatten() { + y_absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return y_absl::string_view(contents_.data(), contents_.size()); + } else { + y_absl::string_view already_flat_contents; + if (GetFlatAux(rep, &already_flat_contents)) { + return already_flat_contents; + } + } + return FlattenSlowPath(); +} + +inline void Cord::Append(y_absl::string_view src) { + contents_.AppendArray(src, CordzUpdateTracker::kAppendString); +} + +inline void Cord::Prepend(y_absl::string_view src) { + PrependArray(src, CordzUpdateTracker::kPrependString); +} + +extern template void Cord::Append(TString&& src); +extern template void Cord::Prepend(TString&& src); + +inline int Cord::Compare(const Cord& rhs) const { + if (!contents_.is_tree() && !rhs.contents_.is_tree()) { + return contents_.BitwiseCompare(rhs.contents_); + } + + return CompareImpl(rhs); +} + +// Does 'this' cord start/end with rhs +inline bool Cord::StartsWith(const Cord& rhs) const { + if (contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline bool Cord::StartsWith(y_absl::string_view rhs) const { + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { + if (tree->tag == cord_internal::BTREE) { + current_chunk_ = btree_reader_.Init(tree->btree()); + return; + } + + stack_of_right_children_.push_back(tree); + operator++(); +} + +inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) + : bytes_remaining_(tree->length) { + InitTree(tree); +} + +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) + : bytes_remaining_(cord->size()) { + if (cord->contents_.is_tree()) { + InitTree(cord->contents_.as_tree()); + } else { + current_chunk_ = + y_absl::string_view(cord->contents_.data(), bytes_remaining_); + } +} + +inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() { + current_chunk_ = btree_reader_.Next(); + return *this; +} + +inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) { + assert(n >= current_chunk_.size()); + bytes_remaining_ -= n; + if (bytes_remaining_) { + if (n == current_chunk_.size()) { + current_chunk_ = btree_reader_.Next(); + } else { + size_t offset = btree_reader_.length() - bytes_remaining_; + current_chunk_ = btree_reader_.Seek(offset); + } + } else { + current_chunk_ = {}; + } +} + +inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && + "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + if (bytes_remaining_ > 0) { + return btree_reader_ ? AdvanceBtree() : AdvanceStack(); + } else { + current_chunk_ = {}; + } + return *this; +} + +inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { + ChunkIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { + return bytes_remaining_ == other.bytes_remaining_; +} + +inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { + return !(*this == other); +} + +inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); + return current_chunk_; +} + +inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); + return ¤t_chunk_; +} + +inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { + assert(n < current_chunk_.size()); + current_chunk_.remove_prefix(n); + bytes_remaining_ -= n; +} + +inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + assert(bytes_remaining_ >= n); + if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { + RemoveChunkPrefix(n); + } else if (n != 0) { + btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n); + } +} + +inline Cord::ChunkIterator Cord::chunk_begin() const { + return ChunkIterator(this); +} + +inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } + +inline Cord::ChunkIterator Cord::ChunkRange::begin() const { + return cord_->chunk_begin(); +} + +inline Cord::ChunkIterator Cord::ChunkRange::end() const { + return cord_->chunk_end(); +} + +inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } + +inline Cord::CharIterator& Cord::CharIterator::operator++() { + if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { + chunk_iterator_.RemoveChunkPrefix(1); + } else { + ++chunk_iterator_; + } + return *this; +} + +inline Cord::CharIterator Cord::CharIterator::operator++(int) { + CharIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::CharIterator::operator==(const CharIterator& other) const { + return chunk_iterator_ == other.chunk_iterator_; +} + +inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { + return !(*this == other); +} + +inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { + return *chunk_iterator_->data(); +} + +inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const { + return chunk_iterator_->data(); +} + +inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); +} + +inline void Cord::Advance(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + it->chunk_iterator_.AdvanceBytes(n_bytes); +} + +inline y_absl::string_view Cord::ChunkRemaining(const CharIterator& it) { + return *it.chunk_iterator_; +} + +inline Cord::CharIterator Cord::char_begin() const { + return CharIterator(this); +} + +inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } + +inline Cord::CharIterator Cord::CharRange::begin() const { + return cord_->char_begin(); +} + +inline Cord::CharIterator Cord::CharRange::end() const { + return cord_->char_end(); +} + +inline Cord::CharRange Cord::Chars() const { return CharRange(this); } + +inline void Cord::ForEachChunk( + y_absl::FunctionRef<void(y_absl::string_view)> callback) const { + y_absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + callback(y_absl::string_view(contents_.data(), contents_.size())); + } else { + return ForEachChunkAux(rep, callback); + } +} + +// Nonmember Cord-to-Cord relational operarators. +inline bool operator==(const Cord& lhs, const Cord& rhs) { + if (lhs.contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (lhs.size() != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, const Cord& y) { return x.Compare(y) < 0; } +inline bool operator>(const Cord& x, const Cord& y) { return x.Compare(y) > 0; } +inline bool operator<=(const Cord& x, const Cord& y) { + return x.Compare(y) <= 0; +} +inline bool operator>=(const Cord& x, const Cord& y) { + return x.Compare(y) >= 0; +} + +// Nonmember Cord-to-y_absl::string_view relational operators. +// +// Due to implicit conversions, these also enable comparisons of Cord with +// with TString, ::string, and const char*. +inline bool operator==(const Cord& lhs, y_absl::string_view rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator==(y_absl::string_view x, const Cord& y) { return y == x; } +inline bool operator!=(const Cord& x, y_absl::string_view y) { return !(x == y); } +inline bool operator!=(y_absl::string_view x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, y_absl::string_view y) { + return x.Compare(y) < 0; +} +inline bool operator<(y_absl::string_view x, const Cord& y) { + return y.Compare(x) > 0; +} +inline bool operator>(const Cord& x, y_absl::string_view y) { return y < x; } +inline bool operator>(y_absl::string_view x, const Cord& y) { return y < x; } +inline bool operator<=(const Cord& x, y_absl::string_view y) { return !(y < x); } +inline bool operator<=(y_absl::string_view x, const Cord& y) { return !(y < x); } +inline bool operator>=(const Cord& x, y_absl::string_view y) { return !(x < y); } +inline bool operator>=(y_absl::string_view x, const Cord& y) { return !(x < y); } + +// Some internals exposed to test code. +namespace strings_internal { +class CordTestAccess { + public: + static size_t FlatOverhead(); + static size_t MaxFlatLength(); + static size_t SizeofCordRepConcat(); + static size_t SizeofCordRepExternal(); + static size_t SizeofCordRepSubstring(); + static size_t FlatTagToLength(uint8_t tag); + static uint8_t LengthToTag(size_t s); +}; +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORD_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord/ya.make new file mode 100644 index 0000000000..8f3ec1f20f --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord/ya.make @@ -0,0 +1,58 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/low_level_alloc + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/debugging + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/stacktrace + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/symbolize + contrib/restricted/abseil-cpp-tstring/y_absl/demangle + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/profiling/internal/exponential_biased + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization/internal + contrib/restricted/abseil-cpp-tstring/y_absl/time + contrib/restricted/abseil-cpp-tstring/y_absl/time/civil_time + contrib/restricted/abseil-cpp-tstring/y_absl/time/time_zone + contrib/restricted/abseil-cpp-tstring/y_absl/types/bad_optional_access + contrib/restricted/abseil-cpp-tstring/y_absl/algorithm + contrib/restricted/abseil-cpp-tstring/y_absl/container + contrib/restricted/abseil-cpp-tstring/y_absl/functional + contrib/restricted/abseil-cpp-tstring/y_absl/types + contrib/restricted/abseil-cpp-tstring/y_absl/utility +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings) + +SRCS( + cord.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord_test_helpers.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord_test_helpers.h new file mode 100644 index 0000000000..8dd7c05751 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cord_test_helpers.h @@ -0,0 +1,122 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORD_TEST_HELPERS_H_ + +#include <cstdint> +#include <iostream> +#include <util/generic/string.h> + +#include "y_absl/base/config.h" +#include "y_absl/strings/cord.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// Cord sizes relevant for testing +enum class TestCordSize { + // An empty value + kEmpty = 0, + + // An inlined string value + kInlined = cord_internal::kMaxInline / 2 + 1, + + // 'Well known' SSO lengths (excluding terminating zero). + // libstdcxx has a maximum SSO of 15, libc++ has a maximum SSO of 22. + kStringSso1 = 15, + kStringSso2 = 22, + + // A string value which is too large to fit in inlined data, but small enough + // such that Cord prefers copying the value if possible, i.e.: not stealing + // TString inputs, or referencing existing CordReps on Append, etc. + kSmall = cord_internal::kMaxBytesToCopy / 2 + 1, + + // A string value large enough that Cord prefers to reference or steal from + // existing inputs rather than copying contents of the input. + kMedium = cord_internal::kMaxFlatLength / 2 + 1, + + // A string value large enough to cause it to be stored in mutliple flats. + kLarge = cord_internal::kMaxFlatLength * 4 +}; + +// To string helper +inline y_absl::string_view ToString(TestCordSize size) { + switch (size) { + case TestCordSize::kEmpty: + return "Empty"; + case TestCordSize::kInlined: + return "Inlined"; + case TestCordSize::kSmall: + return "Small"; + case TestCordSize::kStringSso1: + return "StringSso1"; + case TestCordSize::kStringSso2: + return "StringSso2"; + case TestCordSize::kMedium: + return "Medium"; + case TestCordSize::kLarge: + return "Large"; + } + return "???"; +} + +// Returns the length matching the specified size +inline size_t Length(TestCordSize size) { return static_cast<size_t>(size); } + +// Stream output helper +inline std::ostream& operator<<(std::ostream& stream, TestCordSize size) { + return stream << ToString(size); +} + +// Creates a multi-segment Cord from an iterable container of strings. The +// resulting Cord is guaranteed to have one segment for every string in the +// container. This allows code to be unit tested with multi-segment Cord +// inputs. +// +// Example: +// +// y_absl::Cord c = y_absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); +// EXPECT_FALSE(c.GetFlat(&unused)); +// +// The mechanism by which this Cord is created is an implementation detail. Any +// implementation that produces a multi-segment Cord may produce a flat Cord in +// the future as new optimizations are added to the Cord class. +// MakeFragmentedCord will, however, always be updated to return a multi-segment +// Cord. +template <typename Container> +Cord MakeFragmentedCord(const Container& c) { + Cord result; + for (const auto& s : c) { + auto* external = new TString(s); + Cord tmp = y_absl::MakeCordFromExternal( + *external, [external](y_absl::string_view) { delete external; }); + tmp.Prepend(result); + result = tmp; + } + return result; +} + +inline Cord MakeFragmentedCord(std::initializer_list<y_absl::string_view> list) { + return MakeFragmentedCord<std::initializer_list<y_absl::string_view>>(list); +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cordz_test_helpers.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cordz_test_helpers.h new file mode 100644 index 0000000000..bbb952f073 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/cordz_test_helpers.h @@ -0,0 +1,151 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ + +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "y_absl/base/config.h" +#include "y_absl/base/macros.h" +#include "y_absl/strings/cord.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cordz_info.h" +#include "y_absl/strings/internal/cordz_sample_token.h" +#include "y_absl/strings/internal/cordz_statistics.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" +#include "y_absl/strings/str_cat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// Returns the CordzInfo for the cord, or nullptr if the cord is not sampled. +inline const cord_internal::CordzInfo* GetCordzInfoForTesting( + const Cord& cord) { + if (!cord.contents_.is_tree()) return nullptr; + return cord.contents_.cordz_info(); +} + +// Returns true if the provided cordz_info is in the list of sampled cords. +inline bool CordzInfoIsListed(const cord_internal::CordzInfo* cordz_info, + cord_internal::CordzSampleToken token = {}) { + for (const cord_internal::CordzInfo& info : token) { + if (cordz_info == &info) return true; + } + return false; +} + +// Matcher on Cord that verifies all of: +// - the cord is sampled +// - the CordzInfo of the cord is listed / discoverable. +// - the reported CordzStatistics match the cord's actual properties +// - the cord has an (initial) UpdateTracker count of 1 for `method` +MATCHER_P(HasValidCordzInfoOf, method, "CordzInfo matches cord") { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + if (!CordzInfoIsListed(cord_info)) { + *result_listener << "cord is sampled, but not listed"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.size != arg.size()) { + *result_listener << "cordz size " << stat.size + << " does not match cord size " << arg.size(); + return false; + } + if (stat.update_tracker.Value(method) != 1) { + *result_listener << "Expected method count 1 for " << method << ", found " + << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Matcher on Cord that verifies that the cord is sampled and that the CordzInfo +// update tracker has 'method' with a call count of 'n' +MATCHER_P2(CordzMethodCountEq, method, n, + y_absl::StrCat("CordzInfo method count equals ", n)) { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.update_tracker.Value(method) != n) { + *result_listener << "Expected method count " << n << " for " << method + << ", found " << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Cordz will only update with a new rate once the previously scheduled event +// has fired. When we disable Cordz, a long delay takes place where we won't +// consider profiling new Cords. CordzSampleIntervalHelper will burn through +// that interval and allow for testing that assumes that the average sampling +// interval is a particular value. +class CordzSamplingIntervalHelper { + public: + explicit CordzSamplingIntervalHelper(int32_t interval) + : orig_mean_interval_(y_absl::cord_internal::get_cordz_mean_interval()) { + y_absl::cord_internal::set_cordz_mean_interval(interval); + y_absl::cord_internal::cordz_set_next_sample_for_testing(interval); + } + + ~CordzSamplingIntervalHelper() { + y_absl::cord_internal::set_cordz_mean_interval(orig_mean_interval_); + y_absl::cord_internal::cordz_set_next_sample_for_testing(orig_mean_interval_); + } + + private: + int32_t orig_mean_interval_; +}; + +// Wrapper struct managing a small CordRep `rep` +struct TestCordRep { + cord_internal::CordRepFlat* rep; + + TestCordRep() { + rep = cord_internal::CordRepFlat::New(100); + rep->length = 100; + memset(rep->Data(), 1, 100); + } + ~TestCordRep() { cord_internal::CordRep::Unref(rep); } +}; + +// Wrapper struct managing a small CordRep `rep`, and +// an InlineData `data` initialized with that CordRep. +struct TestCordData { + TestCordRep rep; + cord_internal::InlineData data{rep.rep}; +}; + +// Creates a Cord that is not sampled +template <typename... Args> +Cord UnsampledCord(Args... args) { + CordzSamplingIntervalHelper never(9999); + Cord cord(std::forward<Args>(args)...); + ABSL_ASSERT(GetCordzInfoForTesting(cord) == nullptr); + return cord; +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.cc new file mode 100644 index 0000000000..8c82740608 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.cc @@ -0,0 +1,949 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/escaping.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <util/generic/string.h> + +#include "y_absl/base/internal/endian.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/internal/unaligned_access.h" +#include "y_absl/strings/internal/char_map.h" +#include "y_absl/strings/internal/escaping.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/internal/utf8.h" +#include "y_absl/strings/str_cat.h" +#include "y_absl/strings/str_join.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace { + +// These are used for the leave_nulls_escaped argument to CUnescapeInternal(). +constexpr bool kUnescapeNulls = false; + +inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } + +inline int hex_digit_to_int(char c) { + static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, + "Character set must be ASCII."); + assert(y_absl::ascii_isxdigit(c)); + int x = static_cast<unsigned char>(c); + if (x > '9') { + x += 9; + } + return x & 0xf; +} + +inline bool IsSurrogate(char32_t c, y_absl::string_view src, TString* error) { + if (c >= 0xD800 && c <= 0xDFFF) { + if (error) { + *error = y_absl::StrCat("invalid surrogate character (0xD800-DFFF): \\", + src); + } + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// Implements both CUnescape() and CUnescapeForNullTerminatedString(). +// +// Unescapes C escape sequences and is the reverse of CEscape(). +// +// If 'source' is valid, stores the unescaped string and its size in +// 'dest' and 'dest_len' respectively, and returns true. Otherwise +// returns false and optionally stores the error description in +// 'error'. Set 'error' to nullptr to disable error reporting. +// +// 'dest' should point to a buffer that is at least as big as 'source'. +// 'source' and 'dest' may be the same. +// +// NOTE: any changes to this function must also be reflected in the older +// UnescapeCEscapeSequences(). +// ---------------------------------------------------------------------- +bool CUnescapeInternal(y_absl::string_view source, bool leave_nulls_escaped, + char* dest, ptrdiff_t* dest_len, TString* error) { + char* d = dest; + const char* p = source.data(); + const char* end = p + source.size(); + const char* last_byte = end - 1; + + // Small optimization for case where source = dest and there's no escaping + while (p == d && p < end && *p != '\\') p++, d++; + + while (p < end) { + if (*p != '\\') { + *d++ = *p++; + } else { + if (++p > last_byte) { // skip past the '\\' + if (error) *error = "String cannot end with \\"; + return false; + } + switch (*p) { + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 'f': *d++ = '\f'; break; + case 'n': *d++ = '\n'; break; + case 'r': *d++ = '\r'; break; + case 't': *d++ = '\t'; break; + case 'v': *d++ = '\v'; break; + case '\\': *d++ = '\\'; break; + case '?': *d++ = '\?'; break; // \? Who knew? + case '\'': *d++ = '\''; break; + case '"': *d++ = '\"'; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + // octal digit: 1 to 3 digits + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; + if (p < last_byte && is_octal_digit(p[1])) + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xff) { + if (error) { + *error = "Value of \\" + + TString(octal_start, p + 1 - octal_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t octal_size = p + 1 - octal_start; + *d++ = '\\'; + memmove(d, octal_start, octal_size); + d += octal_size; + break; + } + *d++ = ch; + break; + } + case 'x': + case 'X': { + if (p >= last_byte) { + if (error) *error = "String cannot end with \\x"; + return false; + } else if (!y_absl::ascii_isxdigit(p[1])) { + if (error) *error = "\\x cannot be followed by a non-hex digit"; + return false; + } + unsigned int ch = 0; + const char* hex_start = p; + while (p < last_byte && y_absl::ascii_isxdigit(p[1])) + // Arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) { + if (error) { + *error = "Value of \\" + + TString(hex_start, p + 1 - hex_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t hex_size = p + 1 - hex_start; + *d++ = '\\'; + memmove(d, hex_start, hex_size); + d += hex_size; + break; + } + *d++ = ch; + break; + } + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 4 >= end) { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + TString(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 4; ++i) { + // Look one char ahead. + if (y_absl::ascii_isxdigit(p[1])) { + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + TString(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memmove(d, hex_start, 5); // u0000 + d += 5; + break; + } + if (IsSurrogate(rune, y_absl::string_view(hex_start, 5), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 8 >= end) { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + TString(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 8; ++i) { + // Look one char ahead. + if (y_absl::ascii_isxdigit(p[1])) { + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + if (error) { + *error = "Value of \\" + + TString(hex_start, p + 1 - hex_start) + + " exceeds Unicode limit (0x10FFFF)"; + } + return false; + } else { + rune = newrune; + } + } else { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + TString(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memmove(d, hex_start, 9); // U00000000 + d += 9; + break; + } + if (IsSurrogate(rune, y_absl::string_view(hex_start, 9), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + default: { + if (error) *error = TString("Unknown escape sequence: \\") + *p; + return false; + } + } + p++; // read past letter we escaped + } + } + *dest_len = d - dest; + return true; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// +// Same as above but uses a TString for output. 'source' and 'dest' +// may be the same. +// ---------------------------------------------------------------------- +bool CUnescapeInternal(y_absl::string_view source, bool leave_nulls_escaped, + TString* dest, TString* error) { + strings_internal::STLStringResizeUninitialized(dest, source.size()); + + ptrdiff_t dest_size; + if (!CUnescapeInternal(source, + leave_nulls_escaped, + &(*dest)[0], + &dest_size, + error)) { + return false; + } + dest->erase(dest_size); + return true; +} + +// ---------------------------------------------------------------------- +// CEscape() +// CHexEscape() +// Utf8SafeCEscape() +// Utf8SafeCHexEscape() +// Escapes 'src' using C-style escape sequences. This is useful for +// preparing query flags. The 'Hex' version uses hexadecimal rather than +// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. +// +// Escaped chars: \n, \r, \t, ", ', \, and !y_absl::ascii_isprint(). +// ---------------------------------------------------------------------- +TString CEscapeInternal(y_absl::string_view src, bool use_hex, + bool utf8_safe) { + TString dest; + bool last_hex_escape = false; // true if last output char was \xNN. + + for (unsigned char c : src) { + bool is_hex_escape = false; + switch (c) { + case '\n': dest.append("\\" "n"); break; + case '\r': dest.append("\\" "r"); break; + case '\t': dest.append("\\" "t"); break; + case '\"': dest.append("\\" "\""); break; + case '\'': dest.append("\\" "'"); break; + case '\\': dest.append("\\" "\\"); break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if ((!utf8_safe || c < 0x80) && + (!y_absl::ascii_isprint(c) || + (last_hex_escape && y_absl::ascii_isxdigit(c)))) { + if (use_hex) { + dest.append("\\" "x"); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); + is_hex_escape = true; + } else { + dest.append("\\"); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); + } + } else { + dest.push_back(c); + break; + } + } + last_hex_escape = is_hex_escape; + } + + return dest; +} + +/* clang-format off */ +constexpr char c_escaped_len[256] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; +/* clang-format on */ + +// Calculates the length of the C-style escaped version of 'src'. +// Assumes that non-printable characters are escaped using octal sequences, and +// that UTF-8 bytes are not handled specially. +inline size_t CEscapedLength(y_absl::string_view src) { + size_t escaped_len = 0; + for (unsigned char c : src) escaped_len += c_escaped_len[c]; + return escaped_len; +} + +void CEscapeAndAppendInternal(y_absl::string_view src, TString* dest) { + size_t escaped_len = CEscapedLength(src); + if (escaped_len == src.size()) { + dest->append(src.data(), src.size()); + return; + } + + size_t cur_dest_len = dest->size(); + strings_internal::STLStringResizeUninitialized(dest, + cur_dest_len + escaped_len); + char* append_ptr = &(*dest)[cur_dest_len]; + + for (unsigned char c : src) { + int char_len = c_escaped_len[c]; + if (char_len == 1) { + *append_ptr++ = c; + } else if (char_len == 2) { + switch (c) { + case '\n': + *append_ptr++ = '\\'; + *append_ptr++ = 'n'; + break; + case '\r': + *append_ptr++ = '\\'; + *append_ptr++ = 'r'; + break; + case '\t': + *append_ptr++ = '\\'; + *append_ptr++ = 't'; + break; + case '\"': + *append_ptr++ = '\\'; + *append_ptr++ = '\"'; + break; + case '\'': + *append_ptr++ = '\\'; + *append_ptr++ = '\''; + break; + case '\\': + *append_ptr++ = '\\'; + *append_ptr++ = '\\'; + break; + } + } else { + *append_ptr++ = '\\'; + *append_ptr++ = '0' + c / 64; + *append_ptr++ = '0' + (c % 64) / 8; + *append_ptr++ = '0' + c % 8; + } + } +} + +bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, + size_t szdest, const signed char* unbase64, + size_t* len) { + static const char kPad64Equals = '='; + static const char kPad64Dot = '.'; + + size_t destidx = 0; + int decode = 0; + int state = 0; + unsigned int ch = 0; + unsigned int temp = 0; + + // If "char" is signed by default, using *src as an array index results in + // accessing negative array elements. Treat the input as a pointer to + // unsigned char to avoid this. + const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); + + // The GET_INPUT macro gets the next input character, skipping + // over any whitespace, and stopping when we reach the end of the + // string or when we read any non-data character. The arguments are + // an arbitrary identifier (used as a label for goto) and the number + // of data bytes that must remain in the input to avoid aborting the + // loop. +#define GET_INPUT(label, remain) \ + label: \ + --szsrc; \ + ch = *src++; \ + decode = unbase64[ch]; \ + if (decode < 0) { \ + if (y_absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ + state = 4 - remain; \ + break; \ + } + + // if dest is null, we're just checking to see if it's legal input + // rather than producing output. (I suspect this could just be done + // with a regexp...). We duplicate the loop so this test can be + // outside it instead of in every iteration. + + if (dest) { + // This loop consumes 4 input bytes and produces 3 output bytes + // per iteration. We can't know at the start that there is enough + // data left in the string for a full iteration, so the loop may + // break out in the middle; if so 'state' will be set to the + // number of input bytes read. + + while (szsrc >= 4) { + // We'll start by optimistically assuming that the next four + // bytes of the string (src[0..3]) are four good data bytes + // (that is, no nulls, whitespace, padding chars, or illegal + // chars). We need to test src[0..2] for nulls individually + // before constructing temp to preserve the property that we + // never read past a null in the string (no matter how long + // szsrc claims the string is). + + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + // Iff any of those four characters was bad (null, illegal, + // whitespace, padding), then temp's high bit will be set + // (because unbase64[] is -1 for all bad characters). + // + // We'll back up and resort to the slower decoder, which knows + // how to handle those cases. + + GET_INPUT(first, 4); + temp = decode; + GET_INPUT(second, 3); + temp = (temp << 6) | decode; + GET_INPUT(third, 2); + temp = (temp << 6) | decode; + GET_INPUT(fourth, 1); + temp = (temp << 6) | decode; + } else { + // We really did have four good data bytes, so advance four + // characters in the string. + + szsrc -= 4; + src += 4; + } + + // temp has 24 bits of input, so write that out as three bytes. + + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + destidx += 3; + } + } else { + while (szsrc >= 4) { + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + GET_INPUT(first_no_dest, 4); + GET_INPUT(second_no_dest, 3); + GET_INPUT(third_no_dest, 2); + GET_INPUT(fourth_no_dest, 1); + } else { + szsrc -= 4; + src += 4; + } + destidx += 3; + } + } + +#undef GET_INPUT + + // if the loop terminated because we read a bad character, return + // now. + if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && + !y_absl::ascii_isspace(ch)) + return false; + + if (ch == kPad64Equals || ch == kPad64Dot) { + // if we stopped by hitting an '=' or '.', un-read that character -- we'll + // look at it again when we count to check for the proper number of + // equals signs at the end. + ++szsrc; + --src; + } else { + // This loop consumes 1 input byte per iteration. It's used to + // clean up the 0-3 input bytes remaining when the first, faster + // loop finishes. 'temp' contains the data from 'state' input + // characters read by the first loop. + while (szsrc > 0) { + --szsrc; + ch = *src++; + decode = unbase64[ch]; + if (decode < 0) { + if (y_absl::ascii_isspace(ch)) { + continue; + } else if (ch == kPad64Equals || ch == kPad64Dot) { + // back up one character; we'll read it again when we check + // for the correct number of pad characters at the end. + ++szsrc; + --src; + break; + } else { + return false; + } + } + + // Each input character gives us six bits of output. + temp = (temp << 6) | decode; + ++state; + if (state == 4) { + // If we've accumulated 24 bits of output, write that out as + // three bytes. + if (dest) { + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 3; + state = 0; + temp = 0; + } + } + } + + // Process the leftover data contained in 'temp' at the end of the input. + int expected_equals = 0; + switch (state) { + case 0: + // Nothing left over; output is a multiple of 3 bytes. + break; + + case 1: + // Bad input; we have 6 bits left over. + return false; + + case 2: + // Produce one more output byte from the 12 input bits we have left. + if (dest) { + if (destidx + 1 > szdest) return false; + temp >>= 4; + dest[destidx] = temp; + } + ++destidx; + expected_equals = 2; + break; + + case 3: + // Produce two more output bytes from the 18 input bits we have left. + if (dest) { + if (destidx + 2 > szdest) return false; + temp >>= 2; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 2; + expected_equals = 1; + break; + + default: + // state should have no other values at this point. + ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d", + state); + } + + // The remainder of the string should be all whitespace, mixed with + // exactly 0 equals signs, or exactly 'expected_equals' equals + // signs. (Always accepting 0 equals signs is an Abseil extension + // not covered in the RFC, as is accepting dot as the pad character.) + + int equals = 0; + while (szsrc > 0) { + if (*src == kPad64Equals || *src == kPad64Dot) + ++equals; + else if (!y_absl::ascii_isspace(*src)) + return false; + --szsrc; + ++src; + } + + const bool ok = (equals == 0 || equals == expected_equals); + if (ok) *len = destidx; + return ok; +} + +// The arrays below were generated by the following code +// #include <sys/time.h> +// #include <stdlib.h> +// #include <string.h> +// main() +// { +// static const char Base64[] = +// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +// char* pos; +// int idx, i, j; +// printf(" "); +// for (i = 0; i < 255; i += 8) { +// for (j = i; j < i + 8; j++) { +// pos = strchr(Base64, j); +// if ((pos == nullptr) || (j == 0)) +// idx = -1; +// else +// idx = pos - Base64; +// if (idx == -1) +// printf(" %2d, ", idx); +// else +// printf(" %2d/*%c*/,", idx, j); +// } +// printf("\n "); +// } +// } +// +// where the value of "Base64[]" was replaced by one of the base-64 conversion +// tables from the functions below. +/* clang-format off */ +constexpr signed char kUnBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; + +constexpr signed char kUnWebSafeBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62/*-*/, -1, -1, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; +/* clang-format on */ + +constexpr char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +template <typename String> +bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, + const signed char* unbase64) { + // Determine the size of the output string. Base64 encodes every 3 bytes into + // 4 characters. any leftover chars are added directly for good measure. + // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 + const size_t dest_len = 3 * (slen / 4) + (slen % 4); + + strings_internal::STLStringResizeUninitialized(dest, dest_len); + + // We are getting the destination buffer by getting the beginning of the + // string and converting it into a char *. + size_t len; + const bool ok = + Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); + if (!ok) { + dest->clear(); + return false; + } + + // could be shorter if there was padding + assert(len <= dest_len); + dest->erase(len); + + return true; +} + +/* clang-format off */ +constexpr char kHexValueLenient[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* clang-format on */ + +// This is a templated function so that T can be either a char* +// or a string. This works because we use the [] operator to access +// individual characters at a time. +template <typename T> +void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { + for (int i = 0; i < num; i++) { + to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) + + (kHexValueLenient[from[i * 2 + 1] & 0xFF]); + } +} + +// This is a templated function so that T can be either a char* or a +// TString. +template <typename T> +void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { + auto dest_ptr = &dest[0]; + for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { + const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; + std::copy(hex_p, hex_p + 2, dest_ptr); + } +} + +} // namespace + +// ---------------------------------------------------------------------- +// CUnescape() +// +// See CUnescapeInternal() for implementation details. +// ---------------------------------------------------------------------- +bool CUnescape(y_absl::string_view source, TString* dest, + TString* error) { + return CUnescapeInternal(source, kUnescapeNulls, dest, error); +} + +TString CEscape(y_absl::string_view src) { + TString dest; + CEscapeAndAppendInternal(src, &dest); + return dest; +} + +TString CHexEscape(y_absl::string_view src) { + return CEscapeInternal(src, true, false); +} + +TString Utf8SafeCEscape(y_absl::string_view src) { + return CEscapeInternal(src, false, true); +} + +TString Utf8SafeCHexEscape(y_absl::string_view src) { + return CEscapeInternal(src, true, true); +} + +// ---------------------------------------------------------------------- +// Base64Unescape() - base64 decoder +// Base64Escape() - base64 encoder +// WebSafeBase64Unescape() - Google's variation of base64 decoder +// WebSafeBase64Escape() - Google's variation of base64 encoder +// +// Check out +// http://tools.ietf.org/html/rfc2045 for formal description, but what we +// care about is that... +// Take the encoded stuff in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- + +bool Base64Unescape(y_absl::string_view src, TString* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); +} + +bool WebSafeBase64Unescape(y_absl::string_view src, TString* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); +} + +void Base64Escape(y_absl::string_view src, TString* dest) { + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + true, strings_internal::kBase64Chars); +} + +void WebSafeBase64Escape(y_absl::string_view src, TString* dest) { + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + false, kWebSafeBase64Chars); +} + +TString Base64Escape(y_absl::string_view src) { + TString dest; + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + true, strings_internal::kBase64Chars); + return dest; +} + +TString WebSafeBase64Escape(y_absl::string_view src) { + TString dest; + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + false, kWebSafeBase64Chars); + return dest; +} + +TString HexStringToBytes(y_absl::string_view from) { + TString result; + const auto num = from.size() / 2; + strings_internal::STLStringResizeUninitialized(&result, num); + y_absl::HexStringToBytesInternal<TString&>(from.data(), result, num); + return result; +} + +TString BytesToHexString(y_absl::string_view from) { + TString result; + strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); + y_absl::BytesToHexStringInternal<TString&>( + reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); + return result; +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.h new file mode 100644 index 0000000000..8868b87879 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/escaping.h @@ -0,0 +1,164 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: escaping.h +// ----------------------------------------------------------------------------- +// +// This header file contains string utilities involved in escaping and +// unescaping strings in various ways. + +#ifndef ABSL_STRINGS_ESCAPING_H_ +#define ABSL_STRINGS_ESCAPING_H_ + +#include <cstddef> +#include <util/generic/string.h> +#include <vector> + +#include "y_absl/base/macros.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/str_join.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// CUnescape() +// +// Unescapes a `source` string and copies it into `dest`, rewriting C-style +// escape sequences (https://en.cppreference.com/w/cpp/language/escape) into +// their proper code point equivalents, returning `true` if successful. +// +// The following unescape sequences can be handled: +// +// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents +// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must +// resolve to a single byte or an error will occur. E.g. values greater than +// 0xff will produce an error. +// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary +// number of following digits are allowed, the unescaped value must resolve +// to a single byte or an error will occur. E.g. '\x0045' is equivalent to +// '\x45', but '\x1234' will produce an error. +// * Unicode escape sequences ('\unnnn' for exactly four hex digits or +// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in +// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and +// 0x99). +// +// If any errors are encountered, this function returns `false`, leaving the +// `dest` output parameter in an unspecified state, and stores the first +// encountered error in `error`. To disable error reporting, set `error` to +// `nullptr` or use the overload with no error reporting below. +// +// Example: +// +// TString s = "foo\\rbar\\nbaz\\t"; +// TString unescaped_s; +// if (!y_absl::CUnescape(s, &unescaped_s) { +// ... +// } +// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); +bool CUnescape(y_absl::string_view source, TString* dest, TString* error); + +// Overload of `CUnescape()` with no error reporting. +inline bool CUnescape(y_absl::string_view source, TString* dest) { + return CUnescape(source, dest, nullptr); +} + +// CEscape() +// +// Escapes a 'src' string using C-style escapes sequences +// (https://en.cppreference.com/w/cpp/language/escape), escaping other +// non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). +// +// Example: +// +// TString s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// TString escaped_s = y_absl::CEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); +TString CEscape(y_absl::string_view src); + +// CHexEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping +// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. +// "\xFF"). +// +// Example: +// +// TString s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// TString escaped_s = y_absl::CHexEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); +TString CHexEscape(y_absl::string_view src); + +// Utf8SafeCEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping bytes as +// octal sequences, and passing through UTF-8 characters without conversion. +// I.e., when encountering any bytes with their high bit set, this function +// will not escape those values, whether or not they are valid UTF-8. +TString Utf8SafeCEscape(y_absl::string_view src); + +// Utf8SafeCHexEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping bytes as +// hexadecimal sequences, and passing through UTF-8 characters without +// conversion. +TString Utf8SafeCHexEscape(y_absl::string_view src); + +// Base64Unescape() +// +// Converts a `src` string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, returning `true` on success. If `src` contains invalid +// characters, `dest` is cleared and returns `false`. +bool Base64Unescape(y_absl::string_view src, TString* dest); + +// WebSafeBase64Unescape() +// +// Converts a `src` string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. +// If `src` contains invalid characters, `dest` is cleared and returns `false`. +bool WebSafeBase64Unescape(y_absl::string_view src, TString* dest); + +// Base64Escape() +// +// Encodes a `src` string into a base64-encoded string, with padding characters. +// This function conforms with RFC 4648 section 4 (base64). +void Base64Escape(y_absl::string_view src, TString* dest); +TString Base64Escape(y_absl::string_view src); + +// WebSafeBase64Escape() +// +// Encodes a `src` string into a base64-like string, using '-' instead of '+' +// and '_' instead of '/', and without padding. This function conforms with RFC +// 4648 section 5 (base64url). +void WebSafeBase64Escape(y_absl::string_view src, TString* dest); +TString WebSafeBase64Escape(y_absl::string_view src); + +// HexStringToBytes() +// +// Converts an ASCII hex string into bytes, returning binary data of length +// `from.size()/2`. +TString HexStringToBytes(y_absl::string_view from); + +// BytesToHexString() +// +// Converts binary data into an ASCII text string, returning a string of size +// `2*from.size()`. +TString BytesToHexString(y_absl::string_view from); + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_ESCAPING_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal/ya.make new file mode 100644 index 0000000000..42b7b6cd5e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal/ya.make @@ -0,0 +1,42 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + cord_internal.cc + cord_rep_btree.cc + cord_rep_btree_navigator.cc + cord_rep_btree_reader.cc + cord_rep_consume.cc + cord_rep_ring.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal/ya.make new file mode 100644 index 0000000000..4e57fc75f6 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal/ya.make @@ -0,0 +1,35 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + escaping.cc + ostringstream.cc + utf8.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/char_map.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/char_map.h new file mode 100644 index 0000000000..25428e304c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/char_map.h @@ -0,0 +1,156 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Character Map Class +// +// A fast, bit-vector map for 8-bit unsigned characters. +// This class is useful for non-character purposes as well. + +#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ +#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +class Charmap { + public: + constexpr Charmap() : m_() {} + + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) : m_() { + while (len--) SetChar(*str++); + } + + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) : m_() { + while (*str) SetChar(*str++); + } + + constexpr bool contains(unsigned char c) const { + return (m_[c / 64] >> (c % 64)) & 0x1; + } + + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; + } + + bool IsZero() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr Charmap Char(char x) { + return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the C-string 's'. + // Note that this is expensively recursive because of the C++11 constexpr + // formulation. Use only in constexpr initializers. + static constexpr Charmap FromString(const char* s) { + return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr Charmap Range(char lo, char hi) { + return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr Charmap operator~(const Charmap& a) { + return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + private: + constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi, + uint64_t word) { + return OpenRangeFromZeroForWord(hi + 1, word) & + ~OpenRangeFromZeroForWord(lo, word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) + ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) { + return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0; + } + + private: + void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +// Mirror the char-classifying predicates in <cctype> +constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } +constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } +constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } +constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } +constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } +constexpr Charmap XDigitCharmap() { + return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); +} +constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } +constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } +constexpr Charmap CntrlCharmap() { + return Charmap::Range(0, 0x7f) & ~PrintCharmap(); +} +constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } +constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } +constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.cc new file mode 100644 index 0000000000..72a4fa188b --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.cc @@ -0,0 +1,359 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/charconv_bigint.h" + +#include <algorithm> +#include <cassert> +#include <util/generic/string.h> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +// Table containing some large powers of 5, for fast computation. + +// Constant step size for entries in the kLargePowersOfFive table. Each entry +// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep +// (or 5**27). +// +// In other words, the Nth entry in the table is 5**(27*N). +// +// 5**27 is the largest power of 5 that fits in 64 bits. +constexpr int kLargePowerOfFiveStep = 27; + +// The largest legal index into the kLargePowersOfFive table. +// +// In other words, the largest precomputed power of 5 is 5**(27*20). +constexpr int kLargestPowerOfFiveIndex = 20; + +// Table of powers of (5**27), up to (5**27)**20 == 5**540. +// +// Used to generate large powers of 5 while limiting the number of repeated +// multiplications required. +// +// clang-format off +const uint32_t kLargePowersOfFive[] = { +// 5**27 (i=1), start=0, end=2 + 0xfa10079dU, 0x6765c793U, +// 5**54 (i=2), start=2, end=6 + 0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U, +// 5**81 (i=3), start=6, end=12 + 0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U, +// 5**108 (i=4), start=12, end=20 + 0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU, + 0x20d3846fU, 0x06d00f73U, +// 5**135 (i=5), start=20, end=30 + 0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U, + 0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU, +// 5**162 (i=6), start=30, end=42 + 0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU, + 0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU, +// 5**189 (i=7), start=42, end=56 + 0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U, + 0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U, + 0x94151217U, 0x0072e9f7U, +// 5**216 (i=8), start=56, end=72 + 0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU, + 0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU, + 0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U, +// 5**243 (i=9), start=72, end=90 + 0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U, + 0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U, + 0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U, +// 5**270 (i=10), start=90, end=110 + 0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U, + 0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U, + 0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U, + 0x0d39e796U, 0x00079250U, +// 5**297 (i=11), start=110, end=132 + 0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U, + 0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U, + 0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U, + 0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U, +// 5**324 (i=12), start=132, end=156 + 0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U, + 0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU, + 0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U, + 0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U, +// 5**351 (i=13), start=156, end=182 + 0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU, + 0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U, + 0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U, + 0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU, + 0x859a4940U, 0x00007fb6U, +// 5**378 (i=14), start=182, end=210 + 0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U, + 0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU, + 0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU, + 0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U, + 0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U, +// 5**405 (i=15), start=210, end=240 + 0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U, + 0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U, + 0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U, + 0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU, + 0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U, +// 5**432 (i=16), start=240, end=272 + 0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU, + 0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U, + 0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U, + 0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU, + 0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U, + 0x3364ea62U, 0x0000086aU, +// 5**459 (i=17), start=272, end=306 + 0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU, + 0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U, + 0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U, + 0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U, + 0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU, + 0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U, +// 5**486 (i=18), start=306, end=342 + 0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU, + 0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U, + 0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU, + 0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U, + 0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU, + 0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU, +// 5**513 (i=19), start=342, end=380 + 0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U, + 0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU, + 0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU, + 0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U, + 0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U, + 0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU, + 0xf0046d27U, 0x0000008dU, +// 5**540 (i=20), start=380, end=420 + 0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU, + 0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U, + 0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U, + 0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U, + 0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U, + 0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU, + 0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U, +}; +// clang-format on + +// Returns a pointer to the big integer data for (5**27)**i. i must be +// between 1 and 20, inclusive. +const uint32_t* LargePowerOfFiveData(int i) { + return kLargePowersOfFive + i * (i - 1); +} + +// Returns the size of the big integer data for (5**27)**i, in words. i must be +// between 1 and 20, inclusive. +int LargePowerOfFiveSize(int i) { return 2 * i; } +} // namespace + +ABSL_DLL const uint32_t kFiveToNth[14] = { + 1, 5, 25, 125, 625, 3125, 15625, + 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, +}; + +ABSL_DLL const uint32_t kTenToNth[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, +}; + +template <int max_words> +int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp, + int significant_digits) { + SetToZero(); + assert(fp.type == FloatType::kNumber); + + if (fp.subrange_begin == nullptr) { + // We already exactly parsed the mantissa, so no more work is necessary. + words_[0] = fp.mantissa & 0xffffffffu; + words_[1] = fp.mantissa >> 32; + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + return fp.exponent; + } + int exponent_adjust = + ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits); + return fp.literal_exponent + exponent_adjust; +} + +template <int max_words> +int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, + int significant_digits) { + assert(significant_digits <= Digits10() + 1); + SetToZero(); + + bool after_decimal_point = false; + // Discard any leading zeroes before the decimal point + while (begin < end && *begin == '0') { + ++begin; + } + int dropped_digits = 0; + // Discard any trailing zeroes. These may or may not be after the decimal + // point. + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + if (begin < end && *std::prev(end) == '.') { + // If the string ends in '.', either before or after dropping zeroes, then + // drop the decimal point and look for more digits to drop. + dropped_digits = 0; + --end; + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + } else if (dropped_digits) { + // We dropped digits, and aren't sure if they're before or after the decimal + // point. Figure that out now. + const char* dp = std::find(begin, end, '.'); + if (dp != end) { + // The dropped trailing digits were after the decimal point, so don't + // count them. + dropped_digits = 0; + } + } + // Any non-fraction digits we dropped need to be accounted for in our exponent + // adjustment. + int exponent_adjust = dropped_digits; + + uint32_t queued = 0; + int digits_queued = 0; + for (; begin != end && significant_digits > 0; ++begin) { + if (*begin == '.') { + after_decimal_point = true; + continue; + } + if (after_decimal_point) { + // For each fractional digit we emit in our parsed integer, adjust our + // decimal exponent to compensate. + --exponent_adjust; + } + int digit = (*begin - '0'); + --significant_digits; + if (significant_digits == 0 && std::next(begin) != end && + (digit == 0 || digit == 5)) { + // If this is the very last significant digit, but insignificant digits + // remain, we know that the last of those remaining significant digits is + // nonzero. (If it wasn't, we would have stripped it before we got here.) + // So if this final digit is a 0 or 5, adjust it upward by 1. + // + // This adjustment is what allows incredibly large mantissas ending in + // 500000...000000000001 to correctly round up, rather than to nearest. + ++digit; + } + queued = 10 * queued + digit; + ++digits_queued; + if (digits_queued == kMaxSmallPowerOfTen) { + MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]); + AddWithCarry(0, queued); + queued = digits_queued = 0; + } + } + // Encode any remaining digits. + if (digits_queued) { + MultiplyBy(kTenToNth[digits_queued]); + AddWithCarry(0, queued); + } + + // If any insignificant digits remain, we will drop them. But if we have not + // yet read the decimal point, then we have to adjust the exponent to account + // for the dropped digits. + if (begin < end && !after_decimal_point) { + // This call to std::find will result in a pointer either to the decimal + // point, or to the end of our buffer if there was none. + // + // Either way, [begin, decimal_point) will contain the set of dropped digits + // that require an exponent adjustment. + const char* decimal_point = std::find(begin, end, '.'); + exponent_adjust += (decimal_point - begin); + } + return exponent_adjust; +} + +template <int max_words> +/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth( + int n) { + BigUnsigned answer(1u); + + // Seed from the table of large powers, if possible. + bool first_pass = true; + while (n >= kLargePowerOfFiveStep) { + int big_power = + std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); + if (first_pass) { + // just copy, rather than multiplying by 1 + std::copy( + LargePowerOfFiveData(big_power), + LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), + answer.words_); + answer.size_ = LargePowerOfFiveSize(big_power); + first_pass = false; + } else { + answer.MultiplyBy(LargePowerOfFiveSize(big_power), + LargePowerOfFiveData(big_power)); + } + n -= kLargePowerOfFiveStep * big_power; + } + answer.MultiplyByFiveToTheNth(n); + return answer; +} + +template <int max_words> +void BigUnsigned<max_words>::MultiplyStep(int original_size, + const uint32_t* other_words, + int other_size, int step) { + int this_i = std::min(original_size - 1, step); + int other_i = step - this_i; + + uint64_t this_word = 0; + uint64_t carry = 0; + for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) { + uint64_t product = words_[this_i]; + product *= other_words[other_i]; + this_word += product; + carry += (this_word >> 32); + this_word &= 0xffffffff; + } + AddWithCarry(step + 1, carry); + words_[step] = this_word & 0xffffffff; + if (this_word > 0 && size_ <= step) { + size_ = step + 1; + } +} + +template <int max_words> +TString BigUnsigned<max_words>::ToString() const { + BigUnsigned<max_words> copy = *this; + TString result; + // Build result in reverse order + while (copy.size() > 0) { + int next_digit = copy.DivMod<10>(); + result.push_back('0' + next_digit); + } + if (result.empty()) { + result.push_back('0'); + } + std::reverse(result.begin(), result.vend()); + return result; +} + +template class BigUnsigned<4>; +template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.h new file mode 100644 index 0000000000..a77aab14dd --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_bigint.h @@ -0,0 +1,423 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ + +#include <algorithm> +#include <cstdint> +#include <iostream> +#include <util/generic/string.h> + +#include "y_absl/base/config.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/internal/charconv_parse.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The largest power that 5 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfFive = 13; +// The largest power that 10 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfTen = 9; + +ABSL_DLL extern const uint32_t + kFiveToNth[kMaxSmallPowerOfFive + 1]; +ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; + +// Large, fixed-width unsigned integer. +// +// Exact rounding for decimal-to-binary floating point conversion requires very +// large integer math, but a design goal of y_absl::from_chars is to avoid +// allocating memory. The integer precision needed for decimal-to-binary +// conversions is large but bounded, so a huge fixed-width integer class +// suffices. +// +// This is an intentionally limited big integer class. Only needed operations +// are implemented. All storage lives in an array data member, and all +// arithmetic is done in-place, to avoid requiring separate storage for operand +// and result. +// +// This is an internal class. Some methods live in the .cc file, and are +// instantiated only for the values of max_words we need. +template <int max_words> +class BigUnsigned { + public: + static_assert(max_words == 4 || max_words == 84, + "unsupported max_words value"); + + BigUnsigned() : size_(0), words_{} {} + explicit constexpr BigUnsigned(uint64_t v) + : size_((v >> 32) ? 2 : v ? 1 : 0), + words_{static_cast<uint32_t>(v & 0xffffffffu), + static_cast<uint32_t>(v >> 32)} {} + + // Constructs a BigUnsigned from the given string_view containing a decimal + // value. If the input string is not a decimal integer, constructs a 0 + // instead. + explicit BigUnsigned(y_absl::string_view sv) : size_(0), words_{} { + // Check for valid input, returning a 0 otherwise. This is reasonable + // behavior only because this constructor is for unit tests. + if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || + sv.empty()) { + return; + } + int exponent_adjust = + ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); + if (exponent_adjust > 0) { + MultiplyByTenToTheNth(exponent_adjust); + } + } + + // Loads the mantissa value of a previously-parsed float. + // + // Returns the associated decimal exponent. The value of the parsed float is + // exactly *this * 10**exponent. + int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); + + // Returns the number of decimal digits of precision this type provides. All + // numbers with this many decimal digits or fewer are representable by this + // type. + // + // Analagous to std::numeric_limits<BigUnsigned>::digits10. + static constexpr int Digits10() { + // 9975007/1035508 is very slightly less than log10(2**32). + return static_cast<uint64_t>(max_words) * 9975007 / 1035508; + } + + // Shifts left by the given number of bits. + void ShiftLeft(int count) { + if (count > 0) { + const int word_shift = count / 32; + if (word_shift >= max_words) { + SetToZero(); + return; + } + size_ = (std::min)(size_ + word_shift, max_words); + count %= 32; + if (count == 0) { + std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); + } else { + for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { + words_[i] = (words_[i - word_shift] << count) | + (words_[i - word_shift - 1] >> (32 - count)); + } + words_[word_shift] = words_[0] << count; + // Grow size_ if necessary. + if (size_ < max_words && words_[size_]) { + ++size_; + } + } + std::fill(words_, words_ + word_shift, 0u); + } + } + + + // Multiplies by v in-place. + void MultiplyBy(uint32_t v) { + if (size_ == 0 || v == 1) { + return; + } + if (v == 0) { + SetToZero(); + return; + } + const uint64_t factor = v; + uint64_t window = 0; + for (int i = 0; i < size_; ++i) { + window += factor * words_[i]; + words_[i] = window & 0xffffffff; + window >>= 32; + } + // If carry bits remain and there's space for them, grow size_. + if (window && size_ < max_words) { + words_[size_] = window & 0xffffffff; + ++size_; + } + } + + void MultiplyBy(uint64_t v) { + uint32_t words[2]; + words[0] = static_cast<uint32_t>(v); + words[1] = static_cast<uint32_t>(v >> 32); + if (words[1] == 0) { + MultiplyBy(words[0]); + } else { + MultiplyBy(2, words); + } + } + + // Multiplies in place by 5 to the power of n. n must be non-negative. + void MultiplyByFiveToTheNth(int n) { + while (n >= kMaxSmallPowerOfFive) { + MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); + n -= kMaxSmallPowerOfFive; + } + if (n > 0) { + MultiplyBy(kFiveToNth[n]); + } + } + + // Multiplies in place by 10 to the power of n. n must be non-negative. + void MultiplyByTenToTheNth(int n) { + if (n > kMaxSmallPowerOfTen) { + // For large n, raise to a power of 5, then shift left by the same amount. + // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. + MultiplyByFiveToTheNth(n); + ShiftLeft(n); + } else if (n > 0) { + // We can do this more quickly for very small N by using a single + // multiplication. + MultiplyBy(kTenToNth[n]); + } + } + + // Returns the value of 5**n, for non-negative n. This implementation uses + // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling + // MultiplyByFiveToTheNth(). + static BigUnsigned FiveToTheNth(int n); + + // Multiplies by another BigUnsigned, in-place. + template <int M> + void MultiplyBy(const BigUnsigned<M>& other) { + MultiplyBy(other.size(), other.words()); + } + + void SetToZero() { + std::fill(words_, words_ + size_, 0u); + size_ = 0; + } + + // Returns the value of the nth word of this BigUnsigned. This is + // range-checked, and returns 0 on out-of-bounds accesses. + uint32_t GetWord(int index) const { + if (index < 0 || index >= size_) { + return 0; + } + return words_[index]; + } + + // Returns this integer as a decimal string. This is not used in the decimal- + // to-binary conversion; it is intended to aid in testing. + TString ToString() const; + + int size() const { return size_; } + const uint32_t* words() const { return words_; } + + private: + // Reads the number between [begin, end), possibly containing a decimal point, + // into this BigUnsigned. + // + // Callers are required to ensure [begin, end) contains a valid number, with + // one or more decimal digits and at most one decimal point. This routine + // will behave unpredictably if these preconditions are not met. + // + // Only the first `significant_digits` digits are read. Digits beyond this + // limit are "sticky": If the final significant digit is 0 or 5, and if any + // dropped digit is nonzero, then that final significant digit is adjusted up + // to 1 or 6. This adjustment allows for precise rounding. + // + // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to + // account for the decimal point and for dropped significant digits. After + // this function returns, + // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. + int ReadDigits(const char* begin, const char* end, int significant_digits); + + // Performs a step of big integer multiplication. This computes the full + // (64-bit-wide) values that should be added at the given index (step), and + // adds to that location in-place. + // + // Because our math all occurs in place, we must multiply starting from the + // highest word working downward. (This is a bit more expensive due to the + // extra carries involved.) + // + // This must be called in steps, for each word to be calculated, starting from + // the high end and working down to 0. The first value of `step` should be + // `std::min(original_size + other.size_ - 2, max_words - 1)`. + // The reason for this expression is that multiplying the i'th word from one + // multiplicand and the j'th word of another multiplicand creates a + // two-word-wide value to be stored at the (i+j)'th element. The highest + // word indices we will access are `original_size - 1` from this object, and + // `other.size_ - 1` from our operand. Therefore, + // `original_size + other.size_ - 2` is the first step we should calculate, + // but limited on an upper bound by max_words. + + // Working from high-to-low ensures that we do not overwrite the portions of + // the initial value of *this which are still needed for later steps. + // + // Once called with step == 0, *this contains the result of the + // multiplication. + // + // `original_size` is the size_ of *this before the first call to + // MultiplyStep(). `other_words` and `other_size` are the contents of our + // operand. `step` is the step to perform, as described above. + void MultiplyStep(int original_size, const uint32_t* other_words, + int other_size, int step); + + void MultiplyBy(int other_size, const uint32_t* other_words) { + const int original_size = size_; + const int first_step = + (std::min)(original_size + other_size - 2, max_words - 1); + for (int step = first_step; step >= 0; --step) { + MultiplyStep(original_size, other_words, other_size, step); + } + } + + // Adds a 32-bit value to the index'th word, with carry. + void AddWithCarry(int index, uint32_t value) { + if (value) { + while (index < max_words && value > 0) { + words_[index] += value; + // carry if we overflowed in this word: + if (value > words_[index]) { + value = 1; + ++index; + } else { + value = 0; + } + } + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + + void AddWithCarry(int index, uint64_t value) { + if (value && index < max_words) { + uint32_t high = value >> 32; + uint32_t low = value & 0xffffffff; + words_[index] += low; + if (words_[index] < low) { + ++high; + if (high == 0) { + // Carry from the low word caused our high word to overflow. + // Short circuit here to do the right thing. + AddWithCarry(index + 2, static_cast<uint32_t>(1)); + return; + } + } + if (high > 0) { + AddWithCarry(index + 1, high); + } else { + // Normally 32-bit AddWithCarry() sets size_, but since we don't call + // it when `high` is 0, do it ourselves here. + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + } + + // Divide this in place by a constant divisor. Returns the remainder of the + // division. + template <uint32_t divisor> + uint32_t DivMod() { + uint64_t accumulator = 0; + for (int i = size_ - 1; i >= 0; --i) { + accumulator <<= 32; + accumulator += words_[i]; + // accumulator / divisor will never overflow an int32_t in this loop + words_[i] = static_cast<uint32_t>(accumulator / divisor); + accumulator = accumulator % divisor; + } + while (size_ > 0 && words_[size_ - 1] == 0) { + --size_; + } + return static_cast<uint32_t>(accumulator); + } + + // The number of elements in words_ that may carry significant values. + // All elements beyond this point are 0. + // + // When size_ is 0, this BigUnsigned stores the value 0. + // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is + // nonzero. This can occur due to overflow truncation. + // In particular, x.size_ != y.size_ does *not* imply x != y. + int size_; + uint32_t words_[max_words]; +}; + +// Compares two big integer instances. +// +// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. +template <int N, int M> +int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = limit - 1; i >= 0; --i) { + const uint32_t lhs_word = lhs.GetWord(i); + const uint32_t rhs_word = rhs.GetWord(i); + if (lhs_word < rhs_word) { + return -1; + } else if (lhs_word > rhs_word) { + return 1; + } + } + return 0; +} + +template <int N, int M> +bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = 0; i < limit; ++i) { + if (lhs.GetWord(i) != rhs.GetWord(i)) { + return false; + } + } + return true; +} + +template <int N, int M> +bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs == rhs); +} + +template <int N, int M> +bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return Compare(lhs, rhs) == -1; +} + +template <int N, int M> +bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return rhs < lhs; +} +template <int N, int M> +bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(rhs < lhs); +} +template <int N, int M> +bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs < rhs); +} + +// Output operator for BigUnsigned, for testing purposes only. +template <int N> +std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { + return os << num.ToString(); +} + +// Explicit instantiation declarations for the sizes of BigUnsigned that we +// are using. +// +// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is +// still bigger than an int128, and 84 is a large value we will want to use +// in the from_chars implementation. +// +// Comments justifying the use of 84 belong in the from_chars implementation, +// and will be added in a follow-up CL. +extern template class BigUnsigned<4>; +extern template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.cc new file mode 100644 index 0000000000..f0f78eb68c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.cc @@ -0,0 +1,504 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/charconv_parse.h" +#include "y_absl/strings/charconv.h" + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "y_absl/strings/internal/memutil.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace { + +// ParseFloat<10> will read the first 19 significant digits of the mantissa. +// This number was chosen for multiple reasons. +// +// (a) First, for whatever integer type we choose to represent the mantissa, we +// want to choose the largest possible number of decimal digits for that integer +// type. We are using uint64_t, which can express any 19-digit unsigned +// integer. +// +// (b) Second, we need to parse enough digits that the binary value of any +// mantissa we capture has more bits of resolution than the mantissa +// representation in the target float. Our algorithm requires at least 3 bits +// of headway, but 19 decimal digits give a little more than that. +// +// The following static assertions verify the above comments: +constexpr int kDecimalMantissaDigitsMax = 19; + +static_assert(std::numeric_limits<uint64_t>::digits10 == + kDecimalMantissaDigitsMax, + "(a) above"); + +// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. +static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed"); +static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact"); +static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); + +// The lowest valued 19-digit decimal mantissa we can read still contains +// sufficient information to reconstruct a binary mantissa. +static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above"); + +// ParseFloat<16> will read the first 15 significant digits of the mantissa. +// +// Because a base-16-to-base-2 conversion can be done exactly, we do not need +// to maximize the number of scanned hex digits to improve our conversion. What +// is required is to scan two more bits than the mantissa can represent, so that +// we always round correctly. +// +// (One extra bit does not suffice to perform correct rounding, since a number +// exactly halfway between two representable floats has unique rounding rules, +// so we need to differentiate between a "halfway between" number and a "closer +// to the larger value" number.) +constexpr int kHexadecimalMantissaDigitsMax = 15; + +// The minimum number of significant bits that will be read from +// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since +// the most significant digit can be a "1", which only contributes a single +// significant bit. +constexpr int kGuaranteedHexadecimalMantissaBitPrecision = + 4 * kHexadecimalMantissaDigitsMax - 3; + +static_assert(kGuaranteedHexadecimalMantissaBitPrecision > + std::numeric_limits<double>::digits + 2, + "kHexadecimalMantissaDigitsMax too small"); + +// We also impose a limit on the number of significant digits we will read from +// an exponent, to avoid having to deal with integer overflow. We use 9 for +// this purpose. +// +// If we read a 9 digit exponent, the end result of the conversion will +// necessarily be infinity or zero, depending on the sign of the exponent. +// Therefore we can just drop extra digits on the floor without any extra +// logic. +constexpr int kDecimalExponentDigitsMax = 9; +static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax, + "int type too small"); + +// To avoid incredibly large inputs causing integer overflow for our exponent, +// we impose an arbitrary but very large limit on the number of significant +// digits we will accept. The implementation refuses to match a string with +// more consecutive significant mantissa digits than this. +constexpr int kDecimalDigitLimit = 50000000; + +// Corresponding limit for hexadecimal digit inputs. This is one fourth the +// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires +// a binary exponent adjustment of 4. +constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; + +// The largest exponent we can read is 999999999 (per +// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get +// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these +// comfortably fits in an integer. +// +// We count kDecimalDigitLimit twice because there are independent limits for +// numbers before and after the decimal point. (In the case where there are no +// significant digits before the decimal point, there are independent limits for +// post-decimal-point leading zeroes and for significant digits.) +static_assert(999999999 + 2 * kDecimalDigitLimit < + std::numeric_limits<int>::max(), + "int type too small"); +static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < + std::numeric_limits<int>::max(), + "int type too small"); + +// Returns true if the provided bitfield allows parsing an exponent value +// (e.g., "1.5e100"). +bool AllowExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific || !fixed; +} + +// Returns true if the provided bitfield requires an exponent value be present. +bool RequireExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific && !fixed; +} + +const int8_t kAsciiToInt[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +// Returns true if `ch` is a digit in the given base +template <int base> +bool IsDigit(char ch); + +// Converts a valid `ch` to its digit value in the given base. +template <int base> +unsigned ToDigit(char ch); + +// Returns true if `ch` is the exponent delimiter for the given base. +template <int base> +bool IsExponentCharacter(char ch); + +// Returns the maximum number of significant digits we will read for a float +// in the given base. +template <int base> +constexpr int MantissaDigitsMax(); + +// Returns the largest consecutive run of digits we will accept when parsing a +// number in the given base. +template <int base> +constexpr int DigitLimit(); + +// Returns the amount the exponent must be adjusted by for each dropped digit. +// (For decimal this is 1, since the digits are in base 10 and the exponent base +// is also 10, but for hexadecimal this is 4, since the digits are base 16 but +// the exponent base is 2.) +template <int base> +constexpr int DigitMagnitude(); + +template <> +bool IsDigit<10>(char ch) { + return ch >= '0' && ch <= '9'; +} +template <> +bool IsDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0; +} + +template <> +unsigned ToDigit<10>(char ch) { + return ch - '0'; +} +template <> +unsigned ToDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)]; +} + +template <> +bool IsExponentCharacter<10>(char ch) { + return ch == 'e' || ch == 'E'; +} + +template <> +bool IsExponentCharacter<16>(char ch) { + return ch == 'p' || ch == 'P'; +} + +template <> +constexpr int MantissaDigitsMax<10>() { + return kDecimalMantissaDigitsMax; +} +template <> +constexpr int MantissaDigitsMax<16>() { + return kHexadecimalMantissaDigitsMax; +} + +template <> +constexpr int DigitLimit<10>() { + return kDecimalDigitLimit; +} +template <> +constexpr int DigitLimit<16>() { + return kHexadecimalDigitLimit; +} + +template <> +constexpr int DigitMagnitude<10>() { + return 1; +} +template <> +constexpr int DigitMagnitude<16>() { + return 4; +} + +// Reads decimal digits from [begin, end) into *out. Returns the number of +// digits consumed. +// +// After max_digits has been read, keeps consuming characters, but no longer +// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit +// is set; otherwise, it is left unmodified. +// +// If no digits are matched, returns 0 and leaves *out unchanged. +// +// ConsumeDigits does not protect against overflow on *out; max_digits must +// be chosen with respect to type T to avoid the possibility of overflow. +template <int base, typename T> +int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, + bool* dropped_nonzero_digit) { + if (base == 10) { + assert(max_digits <= std::numeric_limits<T>::digits10); + } else if (base == 16) { + assert(max_digits * 4 <= std::numeric_limits<T>::digits); + } + const char* const original_begin = begin; + + // Skip leading zeros, but only if *out is zero. + // They don't cause an overflow so we don't have to count them for + // `max_digits`. + while (!*out && end != begin && *begin == '0') ++begin; + + T accumulator = *out; + const char* significant_digits_end = + (end - begin > max_digits) ? begin + max_digits : end; + while (begin < significant_digits_end && IsDigit<base>(*begin)) { + // Do not guard against *out overflow; max_digits was chosen to avoid this. + // Do assert against it, to detect problems in debug builds. + auto digit = static_cast<T>(ToDigit<base>(*begin)); + assert(accumulator * base >= accumulator); + accumulator *= base; + assert(accumulator + digit >= accumulator); + accumulator += digit; + ++begin; + } + bool dropped_nonzero = false; + while (begin < end && IsDigit<base>(*begin)) { + dropped_nonzero = dropped_nonzero || (*begin != '0'); + ++begin; + } + if (dropped_nonzero && dropped_nonzero_digit != nullptr) { + *dropped_nonzero_digit = true; + } + *out = accumulator; + return static_cast<int>(begin - original_begin); +} + +// Returns true if `v` is one of the chars allowed inside parentheses following +// a NaN. +bool IsNanChar(char v) { + return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || + (v >= 'A' && v <= 'Z'); +} + +// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If +// one is found, sets `out` appropriately and returns true. +bool ParseInfinityOrNan(const char* begin, const char* end, + strings_internal::ParsedFloat* out) { + if (end - begin < 3) { + return false; + } + switch (*begin) { + case 'i': + case 'I': { + // An infinity string consists of the characters "inf" or "infinity", + // case insensitive. + if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kInfinity; + if (end - begin >= 8 && + strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { + out->end = begin + 8; + } else { + out->end = begin + 3; + } + return true; + } + case 'n': + case 'N': { + // A NaN consists of the characters "nan", case insensitive, optionally + // followed by a parenthesized sequence of zero or more alphanumeric + // characters and/or underscores. + if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kNan; + out->end = begin + 3; + // NaN is allowed to be followed by a parenthesized string, consisting of + // only the characters [a-zA-Z0-9_]. Match that if it's present. + begin += 3; + if (begin < end && *begin == '(') { + const char* nan_begin = begin + 1; + while (nan_begin < end && IsNanChar(*nan_begin)) { + ++nan_begin; + } + if (nan_begin < end && *nan_begin == ')') { + // We found an extra NaN specifier range + out->subrange_begin = begin + 1; + out->subrange_end = nan_begin; + out->end = nan_begin + 1; + } + } + return true; + } + default: + return false; + } +} +} // namespace + +namespace strings_internal { + +template <int base> +strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, + chars_format format_flags) { + strings_internal::ParsedFloat result; + + // Exit early if we're given an empty range. + if (begin == end) return result; + + // Handle the infinity and NaN cases. + if (ParseInfinityOrNan(begin, end, &result)) { + return result; + } + + const char* const mantissa_begin = begin; + while (begin < end && *begin == '0') { + ++begin; // skip leading zeros + } + uint64_t mantissa = 0; + + int exponent_adjustment = 0; + bool mantissa_is_inexact = false; + int pre_decimal_digits = ConsumeDigits<base>( + begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); + begin += pre_decimal_digits; + int digits_left; + if (pre_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { + // We dropped some non-fraction digits on the floor. Adjust our exponent + // to compensate. + exponent_adjustment = + static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); + digits_left = 0; + } else { + digits_left = + static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); + } + if (begin < end && *begin == '.') { + ++begin; + if (mantissa == 0) { + // If we haven't seen any nonzero digits yet, keep skipping zeros. We + // have to adjust the exponent to reflect the changed place value. + const char* begin_zeros = begin; + while (begin < end && *begin == '0') { + ++begin; + } + int zeros_skipped = static_cast<int>(begin - begin_zeros); + if (zeros_skipped >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } + exponent_adjustment -= static_cast<int>(zeros_skipped); + } + int post_decimal_digits = ConsumeDigits<base>( + begin, end, digits_left, &mantissa, &mantissa_is_inexact); + begin += post_decimal_digits; + + // Since `mantissa` is an integer, each significant digit we read after + // the decimal point requires an adjustment to the exponent. "1.23e0" will + // be stored as `mantissa` == 123 and `exponent` == -2 (that is, + // "123e-2"). + if (post_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (post_decimal_digits > digits_left) { + exponent_adjustment -= digits_left; + } else { + exponent_adjustment -= post_decimal_digits; + } + } + // If we've found no mantissa whatsoever, this isn't a number. + if (mantissa_begin == begin) { + return result; + } + // A bare "." doesn't count as a mantissa either. + if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { + return result; + } + + if (mantissa_is_inexact) { + // We dropped significant digits on the floor. Handle this appropriately. + if (base == 10) { + // If we truncated significant decimal digits, store the full range of the + // mantissa for future big integer math for exact rounding. + result.subrange_begin = mantissa_begin; + result.subrange_end = begin; + } else if (base == 16) { + // If we truncated hex digits, reflect this fact by setting the low + // ("sticky") bit. This allows for correct rounding in all cases. + mantissa |= 1; + } + } + result.mantissa = mantissa; + + const char* const exponent_begin = begin; + result.literal_exponent = 0; + bool found_exponent = false; + if (AllowExponent(format_flags) && begin < end && + IsExponentCharacter<base>(*begin)) { + bool negative_exponent = false; + ++begin; + if (begin < end && *begin == '-') { + negative_exponent = true; + ++begin; + } else if (begin < end && *begin == '+') { + ++begin; + } + const char* const exponent_digits_begin = begin; + // Exponent is always expressed in decimal, even for hexadecimal floats. + begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, + &result.literal_exponent, nullptr); + if (begin == exponent_digits_begin) { + // there were no digits where we expected an exponent. We failed to read + // an exponent and should not consume the 'e' after all. Rewind 'begin'. + found_exponent = false; + begin = exponent_begin; + } else { + found_exponent = true; + if (negative_exponent) { + result.literal_exponent = -result.literal_exponent; + } + } + } + + if (!found_exponent && RequireExponent(format_flags)) { + // Provided flags required an exponent, but none was found. This results + // in a failure to scan. + return result; + } + + // Success! + result.type = strings_internal::FloatType::kNumber; + if (result.mantissa > 0) { + result.exponent = result.literal_exponent + + (DigitMagnitude<base>() * exponent_adjustment); + } else { + result.exponent = 0; + } + result.end = begin; + return result; +} + +template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + chars_format format_flags); +template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.h new file mode 100644 index 0000000000..3f942cd4cb --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/charconv_parse.h @@ -0,0 +1,99 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ + +#include <cstdint> + +#include "y_absl/base/config.h" +#include "y_absl/strings/charconv.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Enum indicating whether a parsed float is a number or special value. +enum class FloatType { kNumber, kInfinity, kNan }; + +// The decomposed parts of a parsed `float` or `double`. +struct ParsedFloat { + // Representation of the parsed mantissa, with the decimal point adjusted to + // make it an integer. + // + // During decimal scanning, this contains 19 significant digits worth of + // mantissa value. If digits beyond this point are found, they + // are truncated, and if any of these dropped digits are nonzero, then + // `mantissa` is inexact, and the full mantissa is stored in [subrange_begin, + // subrange_end). + // + // During hexadecimal scanning, this contains 15 significant hex digits worth + // of mantissa value. Digits beyond this point are sticky -- they are + // truncated, but if any dropped digits are nonzero, the low bit of mantissa + // will be set. (This allows for precise rounding, and avoids the need + // to store the full mantissa in [subrange_begin, subrange_end).) + uint64_t mantissa = 0; + + // Floating point expontent. This reflects any decimal point adjustments and + // any truncated digits from the mantissa. The absolute value of the parsed + // number is represented by mantissa * (base ** exponent), where base==10 for + // decimal floats, and base==2 for hexadecimal floats. + int exponent = 0; + + // The literal exponent value scanned from the input, or 0 if none was + // present. This does not reflect any adjustments applied to mantissa. + int literal_exponent = 0; + + // The type of number scanned. + FloatType type = FloatType::kNumber; + + // When non-null, [subrange_begin, subrange_end) marks a range of characters + // that require further processing. The meaning is dependent on float type. + // If type == kNumber and this is set, this is a "wide input": the input + // mantissa contained more than 19 digits. The range contains the full + // mantissa. It plus `literal_exponent` need to be examined to find the best + // floating point match. + // If type == kNan and this is set, the range marks the contents of a + // matched parenthesized character region after the NaN. + const char* subrange_begin = nullptr; + const char* subrange_end = nullptr; + + // One-past-the-end of the successfully parsed region, or nullptr if no + // matching pattern was found. + const char* end = nullptr; +}; + +// Read the floating point number in the provided range, and populate +// ParsedFloat accordingly. +// +// format_flags is a bitmask value specifying what patterns this API will match. +// `scientific` and `fixed` are honored per std::from_chars rules +// ([utility.from.chars], C++17): if exactly one of these bits is set, then an +// exponent is required, or dislallowed, respectively. +// +// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is +// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat. +template <int base> +ParsedFloat ParseFloat(const char* begin, const char* end, + y_absl::chars_format format_flags); + +extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + y_absl::chars_format format_flags); +extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + y_absl::chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.cc new file mode 100644 index 0000000000..6fc39985d8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.cc @@ -0,0 +1,89 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "y_absl/strings/internal/cord_internal.h" + +#include <atomic> +#include <cassert> +#include <memory> + +#include "y_absl/container/inlined_vector.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_flat.h" +#include "y_absl/strings/internal/cord_rep_ring.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +ABSL_CONST_INIT std::atomic<bool> cord_btree_enabled(kCordEnableBtreeDefault); +ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( + kCordEnableRingBufferDefault); +ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( + kCordShallowSubcordsDefault); +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); + +void CordRep::Destroy(CordRep* rep) { + assert(rep != nullptr); + + y_absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending; + while (true) { + assert(!rep->refcount.IsImmortal()); + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == BTREE) { + CordRepBtree::Destroy(rep->btree()); + rep = nullptr; + } else if (rep->tag == RING) { + CordRepRing::Destroy(rep->ring()); + rep = nullptr; + } else if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + CordRepFlat::Delete(rep); + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.h new file mode 100644 index 0000000000..82f5ac7b81 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_internal.h @@ -0,0 +1,620 @@ +// Copyright 2021 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/endian.h" +#include "y_absl/base/internal/invoke.h" +#include "y_absl/base/optimization.h" +#include "y_absl/container/internal/compressed_tuple.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordzInfo; + +// Default feature enable states for cord ring buffers +enum CordFeatureDefaults { + kCordEnableBtreeDefault = true, + kCordEnableRingBufferDefault = false, + kCordShallowSubcordsDefault = false +}; + +extern std::atomic<bool> cord_btree_enabled; +extern std::atomic<bool> cord_ring_buffer_enabled; +extern std::atomic<bool> shallow_subcords_enabled; + +// `cord_btree_exhaustive_validation` can be set to force exhaustive validation +// in debug assertions, and code that calls `IsValid()` explicitly. By default, +// assertions should be relatively cheap and AssertValid() can easily lead to +// O(n^2) complexity as recursive / full tree validation is O(n). +extern std::atomic<bool> cord_btree_exhaustive_validation; + +inline void enable_cord_btree(bool enable) { + cord_btree_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_cord_ring_buffer(bool enable) { + cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_shallow_subcords(bool enable) { + shallow_subcords_enabled.store(enable, std::memory_order_relaxed); +} + +enum Constants { + // The inlined size to use with y_absl::InlinedVector. + // + // Note: The InlinedVectors in this file (and in cord.h) do not need to use + // the same value for their inlined size. The fact that they do is historical. + // It may be desirable for each to use a different inlined size optimized for + // that InlinedVector's usage. + // + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + kInlinedVectorSize = 47, + + // Prefer copying blocks of at most this size, otherwise reference count. + kMaxBytesToCopy = 511 +}; + +// Compact class for tracking the reference count and state flags for CordRep +// instances. Data is stored in an atomic int32_t for compactness and speed. +class RefcountAndFlags { + public: + constexpr RefcountAndFlags() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} + struct WithCrc {}; + explicit constexpr RefcountAndFlags(WithCrc) + : count_(kCrcFlag | kRefIncrement) {} + + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. Always returns false when the immortal bit is set. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement && + (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask) != kRefIncrement; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { + return count_.load(std::memory_order_acquire) >> kNumFlags; + } + + // Returns true if the referenced object carries a CRC value. + bool HasCrc() const { + return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + } + + // Returns true iff the atomic integer is 1 and this node does not store + // a CRC. When both these conditions are met, the current thread owns + // the reference and no other thread shares it, so its contents may be + // safely mutated. + // + // If the referenced item is shared, carries a CRC, or is immortal, + // it should not be modified in-place, and this function returns false. + // + // This call performs the memory barrier needed for the owning thread + // to act on the object, so that if it returns true, it may safely + // assume exclusive access to the object. + inline bool IsMutable() { + return (count_.load(std::memory_order_acquire)) == kRefIncrement; + } + + // Returns whether the atomic integer is 1. Similar to IsMutable(), + // but does not check for a stored CRC. (An unshared node with a CRC is not + // mutable, because changing its data would invalidate the CRC.) + // + // When this returns true, there are no other references, and data sinks + // may safely adopt the children of the CordRep. + inline bool IsOne() { + return (count_.load(std::memory_order_acquire) & kRefcountMask) == + kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; + } + + private: + // We reserve the bottom bits for flags. + // kImmortalBit indicates that this entity should never be collected; it is + // used for the StringConstant constructor to avoid collecting immutable + // constant cords. + // kReservedFlag is reserved for future use. + enum { + kNumFlags = 2, + + kImmortalFlag = 0x1, + kCrcFlag = 0x2, + kRefIncrement = (1 << kNumFlags), + + // Bitmask to use when checking refcount by equality. This masks out + // all flags except kImmortalFlag, which is part of the refcount for + // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 + // if the immortal bit is set.) + kRefcountMask = ~kCrcFlag, + }; + + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +class CordRepRing; +class CordRepBtree; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + SUBSTRING = 1, + BTREE = 2, + RING = 3, + EXTERNAL = 4, + + // We have different tags for different sized flat arrays, + // starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on + // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed + // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well + // as the Tag <---> Size logic so that FLAT stil represents the minimum flat + // allocation size. (32 bytes as of now). + FLAT = 5, + MAX_FLAT_TAG = 225 +}; + +// There are various locations where we want to check if some rep is a 'plain' +// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we +// can perform this check in a single branch as 'tag >= EXTERNAL' +// Likewise, we have some locations where we check for 'ring or external/flat', +// so likewise align RING to EXTERNAL. +// Note that we can leave this optimization to the compiler. The compiler will +// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); + +struct CordRep { + CordRep() = default; + constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} + + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + size_t length; + RefcountAndFlags refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + + // `storage` provides two main purposes: + // - the starting point for FlatCordRep.Data() [flexible-array-member] + // - 3 bytes of additional storage for use by derived classes. + // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores + // a 'depth' value in storage[0], and the (future) CordRepBtree class stores + // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to + // allocate room for these in the derived class, as not all compilers reuse + // padding space from the base class (clang and gcc do, MSVC does not, etc) + uint8_t storage[3]; + + // Returns true if this instance's tag matches the requested type. + constexpr bool IsRing() const { return tag == RING; } + constexpr bool IsConcat() const { return tag == CONCAT; } + constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsExternal() const { return tag == EXTERNAL; } + constexpr bool IsFlat() const { return tag >= FLAT; } + constexpr bool IsBtree() const { return tag == BTREE; } + + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; + + // -------------------------------------------------------------------- + // Memory management + + // Destroys the provided `rep`. + static void Destroy(CordRep* rep); + + // Increments the reference count of `rep`. + // Requires `rep` to be a non-null pointer value. + static inline CordRep* Ref(CordRep* rep); + + // Decrements the reference count of `rep`. Destroys rep if count reaches + // zero. Requires `rep` to be a non-null pointer value. + static inline void Unref(CordRep* rep); +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return storage[0]; } + void set_depth(uint8_t depth) { storage[0] = depth; } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// Type for function pointer that will invoke the releaser function and also +// delete the `CordRepExternalImpl` corresponding to the passed in +// `CordRepExternal`. +using ExternalReleaserInvoker = void (*)(CordRepExternal*); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(y_absl::string_view str) + : CordRep(RefcountAndFlags::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; + + // Deletes (releases) the external rep. + // Requires rep != nullptr and rep->IsExternal() + static void Delete(CordRep* rep); +}; + +struct Rank1 {}; +struct Rank0 : Rank1 {}; + +template <typename Releaser, typename = ::y_absl::base_internal::invoke_result_t< + Releaser, y_absl::string_view>> +void InvokeReleaser(Rank0, Releaser&& releaser, y_absl::string_view data) { + ::y_absl::base_internal::invoke(std::forward<Releaser>(releaser), data); +} + +template <typename Releaser, + typename = ::y_absl::base_internal::invoke_result_t<Releaser>> +void InvokeReleaser(Rank1, Releaser&& releaser, y_absl::string_view) { + ::y_absl::base_internal::invoke(std::forward<Releaser>(releaser)); +} + +// We use CompressedTuple so that we can benefit from EBCO. +template <typename Releaser> +struct CordRepExternalImpl + : public CordRepExternal, + public ::y_absl::container_internal::CompressedTuple<Releaser> { + // The extra int arg is so that we can avoid interfering with copy/move + // constructors while still benefitting from perfect forwarding. + template <typename T> + CordRepExternalImpl(T&& releaser, int) + : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { + this->releaser_invoker = &Release; + } + + ~CordRepExternalImpl() { + InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + y_absl::string_view(base, length)); + } + + static void Release(CordRepExternal* rep) { + delete static_cast<CordRepExternalImpl*>(rep); + } +}; + +inline void CordRepExternal::Delete(CordRep* rep) { + assert(rep != nullptr && rep->IsExternal()); + auto* rep_external = static_cast<CordRepExternal*>(rep); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); +} + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; +}; + +template <typename Str> +CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, +}; + +constexpr char GetOrNull(y_absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +// We store cordz_info as 64 bit pointer value in big endian format. This +// guarantees that the least significant byte of cordz_info matches the last +// byte of the inline data representation in as_chars_, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `as_chars_` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// BigEndianByte() creates a big endian representation of 'value', i.e.: a big +// endian value where the last byte in the host's representation holds 'value`, +// with all other bytes being 0. +static constexpr cordz_info_t BigEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return value; +#else + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#endif +} + +class InlineData { + public: + // DefaultInitType forces the use of the default initialization constructor. + enum DefaultInitType { kDefaultInit }; + + // kNullCordzInfo holds the big endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + + constexpr InlineData() : as_chars_{0} {} + explicit InlineData(DefaultInitType) {} + explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} + explicit constexpr InlineData(y_absl::string_view chars) + : as_chars_{ + GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return as_tree_.cordz_info != kNullCordzInfo; + } + + // Returns true if either of the provided instances hold a cordz_info value. + // This method is more efficient than the equivalent `data1.is_profiled() || + // data2.is_profiled()`. Requires both arguments to hold a tree. + static bool is_either_profiled(const InlineData& data1, + const InlineData& data2) { + assert(data1.is_tree() && data2.is_tree()); + return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + kNullCordzInfo; + } + + // Returns the cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + CordzInfo* cordz_info() const { + assert(is_tree()); + intptr_t info = + static_cast<intptr_t>(y_absl::big_endian::ToHost64(as_tree_.cordz_info)); + assert(info & 1); + return reinterpret_cast<CordzInfo*>(info - 1); + } + + // Sets the current cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + void set_cordz_info(CordzInfo* cordz_info) { + assert(is_tree()); + intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; + as_tree_.cordz_info = y_absl::big_endian::FromHost64(info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { + assert(is_tree()); + as_tree_.cordz_info = kNullCordzInfo; + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return as_chars_; + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return as_chars_; } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return as_tree_.rep; + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + as_tree_.rep = rep; + as_tree_.cordz_info = kNullCordzInfo; + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + as_tree_.rep = rep; + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { + assert(!is_tree()); + return tag() >> 1; + } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + tag() = static_cast<char>(size << 1); + } + + private: + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(y_absl::cord_internal::CordRep* tree) + : rep(tree), cordz_info(kNullCordzInfo) {} + // This union uses up extra space so that whether rep is 32 or 64 bits, + // cordz_info will still start at the eighth byte, and the last + // byte of cordz_info will still be the last byte of InlineData. + union { + y_absl::cord_internal::CordRep* rep; + cordz_info_t unused_aligner; + }; + cordz_info_t cordz_info; + }; + + char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } + char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + + // If the data has length <= kMaxInline, we store it in `as_chars_`, and + // store the size in the last char of `as_chars_` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree_.rep` and store a tag in `tagged_size`. + union { + char as_chars_[kMaxInline + 1]; + AsTree as_tree_; + }; +}; + +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); + +inline CordRepConcat* CordRep::concat() { + assert(IsConcat()); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(IsConcat()); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(IsSubstring()); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(IsSubstring()); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(IsExternal()); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(IsExternal()); + return static_cast<const CordRepExternal*>(this); +} + +inline CordRep* CordRep::Ref(CordRep* rep) { + assert(rep != nullptr); + rep->refcount.Increment(); + return rep; +} + +inline void CordRep::Unref(CordRep* rep) { + assert(rep != nullptr); + // Expect refcount to be 0. Avoiding the cost of an atomic decrement should + // typically outweigh the cost of an extra branch checking for ref == 1. + if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { + Destroy(rep); + } +} + +} // namespace cord_internal + +ABSL_NAMESPACE_END +} // namespace y_absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.cc new file mode 100644 index 0000000000..93121c9958 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.cc @@ -0,0 +1,1128 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cord_rep_btree.h" + +#include <cassert> +#include <cstdint> +#include <iostream> +#include <util/generic/string.h> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_consume.h" +#include "y_absl/strings/internal/cord_rep_flat.h" +#include "y_absl/strings/str_cat.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17 + +namespace { + +using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; +using EdgeType = CordRepBtree::EdgeType; +using OpResult = CordRepBtree::OpResult; +using CopyResult = CordRepBtree::CopyResult; + +constexpr auto kFront = CordRepBtree::kFront; +constexpr auto kBack = CordRepBtree::kBack; + +inline bool exhaustive_validation() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + +// Implementation of the various 'Dump' functions. +// Prints the entire tree structure or 'rep'. External callers should +// not specify 'depth' and leave it to its default (0) value. +// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node. +void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, + int depth = 0) { + // Allow for full height trees + substring -> flat / external nodes. + assert(depth <= CordRepBtree::kMaxDepth + 2); + TString sharing = const_cast<CordRep*>(rep)->refcount.IsOne() + ? TString("Private") + : y_absl::StrCat("Shared(", rep->refcount.Get(), ")"); + TString sptr = y_absl::StrCat("0x", y_absl::Hex(rep)); + + // Dumps the data contents of `rep` if `include_contents` is true. + // Always emits a new line character. + auto maybe_dump_data = [&stream, include_contents](const CordRep* r) { + if (include_contents) { + // Allow for up to 60 wide display of content data, which with some + // indentation and prefix / labels keeps us within roughly 80-100 wide. + constexpr size_t kMaxDataLength = 60; + stream << ", data = \"" + << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength) + << (r->length > kMaxDataLength ? "\"..." : "\""); + } + stream << '\n'; + }; + + // For each level, we print the 'shared/private' state and the rep pointer, + // indented by two spaces per recursive depth. + stream << TString(depth * 2, ' ') << sharing << " (" << sptr << ") "; + + if (rep->IsBtree()) { + const CordRepBtree* node = rep->btree(); + TString label = + node->height() ? y_absl::StrCat("Node(", node->height(), ")") : "Leaf"; + stream << label << ", len = " << node->length + << ", begin = " << node->begin() << ", end = " << node->end() + << "\n"; + for (CordRep* edge : node->Edges()) { + DumpAll(edge, include_contents, stream, depth + 1); + } + } else if (rep->tag == SUBSTRING) { + const CordRepSubstring* substring = rep->substring(); + stream << "Substring, len = " << rep->length + << ", start = " << substring->start; + maybe_dump_data(rep); + DumpAll(substring->child, include_contents, stream, depth + 1); + } else if (rep->tag >= FLAT) { + stream << "Flat, len = " << rep->length + << ", cap = " << rep->flat()->Capacity(); + maybe_dump_data(rep); + } else if (rep->tag == EXTERNAL) { + stream << "Extn, len = " << rep->length; + maybe_dump_data(rep); + } +} + +// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring +// small data out of large reps, and general efficiency of 'always copy small +// data'. Consider making this a cord rep internal library function. +CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) { + assert(n != 0); + assert(offset + n <= rep->length); + assert(offset != 0 || n != rep->length); + + if (rep->tag == SUBSTRING) { + CordRepSubstring* substring = rep->substring(); + offset += substring->start; + rep = CordRep::Ref(substring->child); + CordRep::Unref(substring); + } + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = rep; + return substring; +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) { + if (n == rep->length) return rep; + if (n == 0) return CordRep::Unref(rep), nullptr; + return CreateSubstring(rep, offset, n); +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { + if (offset == 0) return rep; + return CreateSubstring(rep, offset, rep->length - offset); +} + +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(CordRepBtree::IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + +template <EdgeType edge_type> +inline y_absl::string_view Consume(y_absl::string_view s, size_t n) { + return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); +} + +template <EdgeType edge_type> +inline y_absl::string_view Consume(char* dst, y_absl::string_view s, size_t n) { + if (edge_type == kBack) { + memcpy(dst, s.data(), n); + return s.substr(n); + } else { + const size_t offset = s.size() - n; + memcpy(dst, s.data() + offset, n); + return s.substr(0, offset); + } +} + +// Known issue / optimization weirdness: the store associated with the +// decrement introduces traffic between cpus (even if the result of that +// traffic does nothing), making this faster than a single call to +// refcount.Decrement() checking the zero refcount condition. +template <typename R, typename Fn> +inline void FastUnref(R* r, Fn&& fn) { + if (r->refcount.IsOne()) { + fn(r); + } else if (!r->refcount.DecrementExpectHighRefcount()) { + fn(r); + } +} + +// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT +// node, or a SUBSTRING of an EXTERNAL or FLAT node. +void DeleteLeafEdge(CordRep* rep) { + for (;;) { + if (rep->tag >= FLAT) { + CordRepFlat::Delete(rep->flat()); + return; + } + if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep->external()); + return; + } + assert(rep->tag == SUBSTRING); + CordRepSubstring* substring = rep->substring(); + rep = substring->child; + assert(rep->tag == EXTERNAL || rep->tag >= FLAT); + delete substring; + if (rep->refcount.Decrement()) return; + } +} + +// StackOperations contains the logic to build a left-most or right-most stack +// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to +// propagate node changes up the stack. +template <EdgeType edge_type> +struct StackOperations { + // Returns true if the node at 'depth' is mutable, i.e. has a refcount + // of one, carries no CRC, and all of its parent nodes have a refcount of one. + inline bool owned(int depth) const { return depth < share_depth; } + + // Returns the node at 'depth'. + inline CordRepBtree* node(int depth) const { return stack[depth]; } + + // Builds a `depth` levels deep stack starting at `tree` recording which nodes + // are private in the form of the 'share depth' where nodes are shared. + inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { + assert(depth <= tree->height()); + int current_depth = 0; + while (current_depth < depth && tree->refcount.IsMutable()) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); + while (current_depth < depth) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + return tree; + } + + // Builds a stack with the invariant that all nodes are private owned / not + // shared and carry no CRC data. This is used in iterative updates where a + // previous propagation guaranteed all nodes have this property. + inline void BuildOwnedStack(CordRepBtree* tree, int height) { + assert(height <= CordRepBtree::kMaxHeight); + int depth = 0; + while (depth < height) { + assert(tree->refcount.IsMutable()); + stack[depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + assert(tree->refcount.IsMutable()); + share_depth = depth + 1; + } + + // Processes the final 'top level' result action for the tree. + // See the 'Action' enum for the various action implications. + static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { + switch (result.action) { + case CordRepBtree::kPopped: + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) + : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; + case CordRepBtree::kCopied: + CordRep::Unref(tree); + ABSL_FALLTHROUGH_INTENDED; + case CordRepBtree::kSelf: + return result.tree; + } + ABSL_INTERNAL_UNREACHABLE; + return result.tree; + } + + // Propagate the action result in 'result' up into all nodes of the stack + // starting at depth 'depth'. 'length' contains the extra length of data that + // was added at the lowest level, and is updated into all nodes of the stack. + // See the 'Action' enum for the various action implications. + // If 'propagate' is true, then any copied node values are updated into the + // stack, which is used for iterative processing on the same stack. + template <bool propagate = false> + inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + // TODO(mvels): revisit the below code to check if 3 loops with 3 + // (incremental) conditions is faster than 1 loop with a switch. + // Benchmarking and perf recordings indicate the loop with switch is + // fastest, likely because of indirect jumps on the tight case values and + // dense branches. But it's worth considering 3 loops, as the `action` + // transitions are mono directional. E.g.: + // while (action == kPopped) { + // ... + // } + // while (action == kCopied) { + // ... + // } + // ... + // We also found that an "if () do {}" loop here seems faster, possibly + // because it allows the branch predictor more granular heuristics on + // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases + // which appear to be the most common use cases. + if (depth != 0) { + do { + CordRepBtree* node = stack[--depth]; + const bool owned = depth < share_depth; + switch (result.action) { + case CordRepBtree::kPopped: + assert(!propagate); + result = node->AddEdge<edge_type>(owned, result.tree, length); + break; + case CordRepBtree::kCopied: + result = node->SetEdge<edge_type>(owned, result.tree, length); + if (propagate) stack[depth] = result.tree; + break; + case CordRepBtree::kSelf: + node->length += length; + while (depth > 0) { + node = stack[--depth]; + node->length += length; + } + return node; + } + } while (depth > 0); + } + return Finalize(tree, result); + } + + // Invokes `Unwind` with `propagate=true` to update the stack node values. + inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + return Unwind</*propagate=*/true>(tree, depth, length, result); + } + + // `share_depth` contains the depth at which the nodes in the stack cannot + // be mutated. I.e., if the top most level is shared (i.e.: + // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node + // is shared (and implicitly all nodes below that) then `share_depth` is 1, + // etc. A `share_depth` greater than the depth of the stack indicates that + // none of the nodes in the stack are shared. + int share_depth; + + NodeStack stack; +}; + +} // namespace + +void CordRepBtree::Dump(const CordRep* rep, y_absl::string_view label, + bool include_contents, std::ostream& stream) { + stream << "===================================\n"; + if (!label.empty()) { + stream << label << '\n'; + stream << "-----------------------------------\n"; + } + if (rep) { + DumpAll(rep, include_contents, stream); + } else { + stream << "NULL\n"; + } +} + +void CordRepBtree::Dump(const CordRep* rep, y_absl::string_view label, + std::ostream& stream) { + Dump(rep, label, false, stream); +} + +void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) { + Dump(rep, y_absl::string_view(), false, stream); +} + +void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge, DeleteLeafEdge); + } + Delete(tree); +} + +void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin, + size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge->btree(), Destroy); + } + Delete(tree); +} + +bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { +#define NODE_CHECK_VALID(x) \ + if (!(x)) { \ + ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \ + return false; \ + } +#define NODE_CHECK_EQ(x, y) \ + if ((x) != (y)) { \ + ABSL_RAW_LOG(ERROR, \ + "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \ + #y, y_absl::StrCat(x).c_str(), y_absl::StrCat(y).c_str()); \ + return false; \ + } + + NODE_CHECK_VALID(tree != nullptr); + NODE_CHECK_VALID(tree->IsBtree()); + NODE_CHECK_VALID(tree->height() <= kMaxHeight); + NODE_CHECK_VALID(tree->begin() < tree->capacity()); + NODE_CHECK_VALID(tree->end() <= tree->capacity()); + NODE_CHECK_VALID(tree->begin() <= tree->end()); + size_t child_length = 0; + for (CordRep* edge : tree->Edges()) { + NODE_CHECK_VALID(edge != nullptr); + if (tree->height() > 0) { + NODE_CHECK_VALID(edge->IsBtree()); + NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1); + } else { + NODE_CHECK_VALID(IsDataEdge(edge)); + } + child_length += edge->length; + } + NODE_CHECK_EQ(child_length, tree->length); + if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + if (!IsValid(edge->btree(), shallow)) return false; + } + } + return true; + +#undef NODE_CHECK_VALID +#undef NODE_CHECK_EQ +} + +#ifndef NDEBUG + +CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +#endif // NDEBUG + +template <EdgeType edge_type> +inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) { + if (size() >= kMaxCapacity) return {New(edge), kPopped}; + OpResult result = ToOpResult(owned); + result.tree->Add<edge_type>(edge); + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { + OpResult result; + const size_t idx = index(edge_type); + if (owned) { + result = {this, kSelf}; + CordRep::Unref(edges_[idx]); + } else { + // Create a copy containing all unchanged edges. Unchanged edges are the + // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. + // We conveniently cover both case using a constexpr `shift` being 0 or 1 + // as `end :== back + 1`. + result = {CopyRaw(), kCopied}; + constexpr int shift = edge_type == kFront ? 1 : 0; + for (CordRep* r : Edges(begin() + shift, back() + shift)) { + CordRep::Ref(r); + } + } + result.tree->edges_[idx] = edge; + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) { + const int depth = tree->height(); + const size_t length = rep->length; + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + const OpResult result = + leaf->AddEdge<edge_type>(ops.owned(depth), rep, length); + return ops.Unwind(tree, depth, length, result); +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kBack>(y_absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t end = 0; + const size_t cap = leaf->capacity(); + while (!data.empty() && end != cap) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[end++] = flat; + data = Consume<kBack>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_end(end); + return leaf; +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kFront>(y_absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t begin = leaf->capacity(); + leaf->set_end(leaf->capacity()); + while (!data.empty() && begin != 0) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[--begin] = flat; + data = Consume<kFront>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_begin(begin); + return leaf; +} + +template <> +y_absl::string_view CordRepBtree::AddData<kBack>(y_absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignBegin(); + const size_t cap = capacity(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[fetch_add_end(1)] = flat; + data = Consume<kBack>(flat->Data(), data, n); + } while (!data.empty() && end() != cap); + return data; +} + +template <> +y_absl::string_view CordRepBtree::AddData<kFront>(y_absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignEnd(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[sub_fetch_begin(1)] = flat; + data = Consume<kFront>(flat->Data(), data, n); + } while (!data.empty() && begin() != 0); + return data; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, y_absl::string_view data, + size_t extra) { + if (ABSL_PREDICT_FALSE(data.empty())) return tree; + + const size_t original_data_size = data.size(); + int depth = tree->height(); + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + + // If there is capacity in the last edge, append as much data + // as possible into this last edge. + if (leaf->size() < leaf->capacity()) { + OpResult result = leaf->ToOpResult(ops.owned(depth)); + data = result.tree->AddData<edge_type>(data, extra); + if (data.empty()) { + result.tree->length += original_data_size; + return ops.Unwind(tree, depth, original_data_size, result); + } + + // We added some data into this leaf, but not all. Propagate the added + // length to the top most node, and rebuild the stack with any newly copied + // or updated nodes. From this point on, the path (leg) from the top most + // node to the right-most node towards the leaf node is privately owned. + size_t delta = original_data_size - data.size(); + assert(delta > 0); + result.tree->length += delta; + tree = ops.Propagate(tree, depth, delta, result); + ops.share_depth = depth + 1; + } + + // We were unable to append all data into the existing right-most leaf node. + // This means all remaining data must be put into (a) new leaf node(s) which + // we append to the tree. To make this efficient, we iteratively build full + // leaf nodes from `data` until the created leaf contains all remaining data. + // We utilize the `Unwind` method to merge the created leaf into the first + // level towards root that has capacity. On each iteration with remaining + // data, we rebuild the stack in the knowledge that right-most nodes are + // privately owned after the first `Unwind` completes. + for (;;) { + OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped}; + if (result.tree->length == data.size()) { + return ops.Unwind(tree, depth, result.tree->length, result); + } + data = Consume<edge_type>(data, result.tree->length); + tree = ops.Unwind(tree, depth, result.tree->length, result); + depth = tree->height(); + ops.BuildOwnedStack(tree, depth); + } +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) { + assert(dst->height() >= src->height()); + + // Capture source length as we may consume / destroy `src`. + const size_t length = src->length; + + // We attempt to merge `src` at its corresponding height in `dst`. + const int depth = dst->height() - src->height(); + StackOperations<edge_type> ops; + CordRepBtree* merge_node = ops.BuildStack(dst, depth); + + // If there is enough space in `merge_node` for all edges from `src`, add all + // edges to this node, making a fresh copy as needed if not privately owned. + // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and + // `Finalize` to merge `src` into the first level towards `root` where there + // is capacity for another edge, or create a new top level node. + OpResult result; + if (merge_node->size() + src->size() <= kMaxCapacity) { + result = merge_node->ToOpResult(ops.owned(depth)); + result.tree->Add<edge_type>(src->Edges()); + result.tree->length += src->length; + if (src->refcount.IsOne()) { + Delete(src); + } else { + for (CordRep* edge : src->Edges()) CordRep::Ref(edge); + CordRepBtree::Unref(src); + } + } else { + result = {src, kPopped}; + } + + // Unless we merged at the top level (i.e.: src and dst are equal height), + // unwind the result towards the top level, and finalize the result. + if (depth) { + return ops.Unwind(dst, depth, length, result); + } + return ops.Finalize(dst, result); +} + +CopyResult CordRepBtree::CopySuffix(size_t offset) { + assert(offset < this->length); + + // As long as `offset` starts inside the last edge, we can 'drop' the current + // depth. For the most extreme example: if offset references the last data + // edge in the tree, there is only a single edge / path from the top of the + // tree to that last edge, so we can drop all the nodes except that edge. + // The fast path check for this is `back->length >= length - offset`. + int height = this->height(); + CordRepBtree* node = this; + size_t len = node->length - offset; + CordRep* back = node->Edge(kBack); + while (back->length >= len) { + offset = back->length - len; + if (--height < 0) { + return {MakeSubstring(CordRep::Ref(back), offset), height}; + } + node = back->btree(); + back = node->Edge(kBack); + } + if (offset == 0) return {CordRep::Ref(node), height}; + + // Offset does not point into the last edge, so we span at least two edges. + // Find the index of offset with `IndexBeyond` which provides us the edge + // 'beyond' the offset if offset is not a clean starting point of an edge. + Position pos = node->IndexBeyond(offset); + CordRepBtree* sub = node->CopyToEndFrom(pos.index, len); + const CopyResult result = {sub, height}; + + // `pos.n` contains a non zero value if the offset is not an exact starting + // point of an edge. In this case, `pos.n` contains the 'trailing' amount of + // bytes of the edge preceding that in `pos.index`. We need to iteratively + // adjust the preceding edge with the 'broken' offset until we have a perfect + // start of the edge. + while (pos.n != 0) { + assert(pos.index >= 1); + const size_t begin = pos.index - 1; + sub->set_begin(begin); + CordRep* const edge = node->Edge(begin); + + len = pos.n; + offset = edge->length - len; + + if (--height < 0) { + sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len); + return result; + } + + node = edge->btree(); + pos = node->IndexBeyond(offset); + + CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len); + sub->edges_[begin] = nsub; + sub = nsub; + } + sub->set_begin(pos.index); + return result; +} + +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { + assert(n > 0); + assert(n <= this->length); + + // As long as `n` does not exceed the length of the first edge, we can 'drop' + // the current depth. For the most extreme example: if we'd copy a 1 byte + // prefix from a tree, there is only a single edge / path from the top of the + // tree to the single data edge containing this byte, so we can drop all the + // nodes except the data node. + int height = this->height(); + CordRepBtree* node = this; + CordRep* front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } + } + if (node->length == n) return {CordRep::Ref(node), height}; + + // `n` spans at least two nodes, find the end point of the span. + Position pos = node->IndexOf(n); + + // Create a partial copy of the node up to `pos.index`, with a defined length + // of `n`. Any 'partial last edge' is added further below as needed. + CordRepBtree* sub = node->CopyBeginTo(pos.index, n); + const CopyResult result = {sub, height}; + + // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as + // this is not zero, we don't have a 'clean cut', so we need to make a + // (partial) copy of that last edge, and repeat this until pos.n is zero. + while (pos.n != 0) { + size_t end = pos.index; + n = pos.n; + + CordRep* edge = node->Edge(pos.index); + if (--height < 0) { + sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n); + sub->set_end(end); + AssertValid(result.edge->btree()); + return result; + } + + node = edge->btree(); + pos = node->IndexOf(n); + CordRepBtree* nsub = node->CopyBeginTo(pos.index, n); + sub->edges_[end++] = nsub; + sub->set_end(end); + sub = nsub; + } + sub->set_end(pos.index); + AssertValid(result.edge->btree()); + return result; +} + +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsMutable(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsMutable(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsMutable()); + const bool edge_is_mutable = edge->refcount.IsMutable(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + +CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return nullptr; + + CordRepBtree* node = this; + int height = node->height(); + Position front = node->IndexOf(offset); + CordRep* left = node->edges_[front.index]; + while (front.n + n <= left->length) { + if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n); + node = left->btree(); + front = node->IndexOf(front.n); + left = node->edges_[front.index]; + } + + const Position back = node->IndexBefore(front, n); + CordRep* const right = node->edges_[back.index]; + assert(back.index > front.index); + + // Get partial suffix and prefix entries. + CopyResult prefix; + CopyResult suffix; + if (height > 0) { + // Copy prefix and suffix of the boundary nodes. + prefix = left->btree()->CopySuffix(front.n); + suffix = right->btree()->CopyPrefix(back.n); + + // If there is an edge between the prefix and suffix edges, then the tree + // must remain at its previous (full) height. If we have no edges between + // prefix and suffix edges, then the tree must be as high as either the + // suffix or prefix edges (which are collapsed to their minimum heights). + if (front.index + 1 == back.index) { + height = (std::max)(prefix.height, suffix.height) + 1; + } + + // Raise prefix and suffixes to the new tree height. + for (int h = prefix.height + 1; h < height; ++h) { + prefix.edge = CordRepBtree::New(prefix.edge); + } + for (int h = suffix.height + 1; h < height; ++h) { + suffix.edge = CordRepBtree::New(suffix.edge); + } + } else { + // Leaf node, simply take substrings for prefix and suffix. + prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1}; + suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1}; + } + + // Compose resulting tree. + CordRepBtree* sub = CordRepBtree::New(height); + size_t end = 0; + sub->edges_[end++] = prefix.edge; + for (CordRep* r : node->Edges(front.index + 1, back.index)) { + sub->edges_[end++] = CordRep::Ref(r); + } + sub->edges_[end++] = suffix.edge; + sub->set_end(end); + sub->length = n; + return AssertValid(sub); +} + +CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left, + CordRepBtree* right) { + return left->height() >= right->height() ? Merge<kBack>(left, right) + : Merge<kFront>(right, left); +} + +bool CordRepBtree::IsFlat(y_absl::string_view* fragment) const { + if (height() == 0 && size() == 1) { + if (fragment) *fragment = Data(begin()); + return true; + } + return false; +} + +bool CordRepBtree::IsFlat(size_t offset, const size_t n, + y_absl::string_view* fragment) const { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return false; + int height = this->height(); + const CordRepBtree* node = this; + for (;;) { + const Position front = node->IndexOf(offset); + const CordRep* edge = node->Edge(front.index); + if (edge->length < front.n + n) return false; + if (--height < 0) { + if (fragment) *fragment = EdgeData(edge).substr(front.n, n); + return true; + } + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +char CordRepBtree::GetCharacter(size_t offset) const { + assert(offset < length); + const CordRepBtree* node = this; + int height = node->height(); + for (;;) { + Position front = node->IndexOf(offset); + if (--height < 0) return node->Data(front.index)[front.n]; + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { + // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. + assert(height() >= 4); + assert(refcount.IsMutable()); + + // Build a stack of nodes we may potentially need to update if we find a + // non-shared FLAT with capacity at the leaf level. + const int depth = height(); + CordRepBtree* node = this; + CordRepBtree* stack[kMaxDepth]; + for (int i = 0; i < depth; ++i) { + node = node->Edge(kBack)->btree(); + if (!node->refcount.IsMutable()) return {}; + stack[i] = node; + } + + // Must be a privately owned, mutable flat. + CordRep* const edge = node->Edge(kBack); + if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; + + // Must have capacity. + const size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + + // Build span on remaining capacity. + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + this->length += delta; + for (int i = 0; i < depth; ++i) { + stack[i]->length += delta; + } + return span; +} + +CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) { + if (rep->IsBtree()) return rep->btree(); + + CordRepBtree* node = nullptr; + auto consume = [&node](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + if (node == nullptr) { + node = New(r); + } else { + node = CordRepBtree::AddCordRep<kBack>(node, r); + } + }; + Consume(rep, consume); + return node; +} + +CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(tree, rep->btree()); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kBack>(tree, r); + }; + Consume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(rep->btree(), tree); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kFront>(tree, r); + }; + ReverseConsume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, y_absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kBack>(tree, data, extra); +} + +CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, y_absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kFront>(tree, data, extra); +} + +template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree, + y_absl::string_view data, + size_t extra); +template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, + y_absl::string_view data, + size_t extra); + +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge<kBack>(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge<kBack>(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.h new file mode 100644 index 0000000000..3ad8097cc8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree.h @@ -0,0 +1,939 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ + +#include <cassert> +#include <cstdint> +#include <iosfwd> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/optimization.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_flat.h" +#include "y_absl/strings/string_view.h" +#include "y_absl/types/span.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeNavigator; + +// CordRepBtree is as the name implies a btree implementation of a Cordrep tree. +// Data is stored at the leaf level only, non leaf nodes contain down pointers +// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT +// or EXTERNAL nodes. The implementation allows for data to be added to either +// end of the tree only, it does not provide any 'insert' logic. This has the +// benefit that we can expect good fill ratios: all nodes except the outer +// 'legs' will have 100% fill ratios for trees built using Append/Prepend +// methods. Merged trees will typically have a fill ratio well above 50% as in a +// similar fashion, one side of the merged tree will typically have a 100% fill +// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or +// better, and the tree never needs balancing. +// +// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that +// input unless explicitly stated otherwise. All functions returning a CordRep* +// or CordRepBtree* instance transfer a reference back to the caller. +// Simplified, callers both 'donate' and 'consume' a reference count on each +// call, simplifying the API. An example of building a tree: +// +// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello")); +// tree = CordRepBtree::Append(tree, MakeFlat("world")); +// +// In the above example, all inputs are consumed, making each call affecting +// `tree` reference count neutral. The returned `tree` value can be different +// from the input if the input is shared with other threads, or if the tree +// grows in height, but callers typically never have to concern themselves with +// that and trust that all methods DTRT at all times. +class CordRepBtree : public CordRep { + public: + // EdgeType identifies `front` and `back` enum values. + // Various implementations in CordRepBtree such as `Add` and `Edge` are + // generic and templated on operating on either of the boundary edges. + // For more information on the possible edges contained in a CordRepBtree + // instance see the documentation for `edges_`. + enum class EdgeType { kFront, kBack }; + + // Convenience constants into `EdgeType` + static constexpr EdgeType kFront = EdgeType::kFront; + static constexpr EdgeType kBack = EdgeType::kBack; + + // Maximum number of edges: based on experiments and performance data, we can + // pick suitable values resulting in optimum cacheline aligned values. The + // preferred values are based on 64-bit systems where we aim to align this + // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc. + // TODO(b/192061034): experiment with alternative sizes. + static constexpr size_t kMaxCapacity = 6; + + // Reasonable maximum height of the btree. We can expect a fill ratio of at + // least 50%: trees are always expanded at the front or back. Concatenating + // trees will then typically fold at the top most node, where the lower nodes + // are at least at capacity on one side of joined inputs. At a lower fill + // rate of 4 edges per node, we have capacity for ~16 million leaf nodes. + // We will fail / abort if an application ever exceeds this height, which + // should be extremely rare (near impossible) and be an indication of an + // application error: we do not assume it reasonable for any application to + // operate correctly with such monster trees. + // Another compelling reason for the number `12` is that any contextual stack + // required for navigation or insertion requires 12 words and 12 bytes, which + // fits inside 2 cache lines with some room to spare, and is reasonable as a + // local stack variable compared to Cord's current near 400 bytes stack use. + // The maximum `height` value of a node is then `kMaxDepth - 1` as node height + // values start with a value of 0 for leaf nodes. + static constexpr int kMaxDepth = 12; + static constexpr int kMaxHeight = kMaxDepth - 1; + + // `Action` defines the action for unwinding changes done at the btree's leaf + // level that need to be propagated up to the parent node(s). Each operation + // on a node has an effect / action defined as follows: + // - kSelf + // The operation (add / update, etc) was performed directly on the node as + // the node is private to the current thread (i.e.: not shared directly or + // indirectly through a refcount > 1). Changes can be propagated directly to + // all parent nodes as all parent nodes are also then private to the current + // thread. + // - kCopied + // The operation (add / update, etc) was performed on a copy of the original + // node, as the node is (potentially) directly or indirectly shared with + // other threads. Changes need to be propagated into the parent nodes where + // the old down pointer must be unreffed and replaced with this new copy. + // Such changes to parent nodes may themselves require a copy if the parent + // node is also shared. A kCopied action can propagate all the way to the + // top node where we then must unref the `tree` input provided by the + // caller, and return the new copy. + // - kPopped + // The operation (typically add) could not be satisfied due to insufficient + // capacity in the targeted node, and a new 'leg' was created that needs to + // be added into the parent node. For example, adding a FLAT inside a leaf + // node that is at capacity will create a new leaf node containing that + // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can + // cascade up the tree if parent nodes are also at capacity. A 'Popped' + // action propagating all the way to the top of the tree will result in + // the tree becoming one level higher than the current tree through a final + // `CordRepBtree::New(tree, popped)` call, resulting in a new top node + // referencing the old tree and the new (fully popped upwards) 'leg'. + enum Action { kSelf, kCopied, kPopped }; + + // Result of an operation on a node. See the `Action` enum for details. + struct OpResult { + CordRepBtree* tree; + Action action; + }; + + // Return value of the CopyPrefix and CopySuffix methods which can + // return a node or data edge at any height inside the tree. + // A height of 0 defines the lowest (leaf) node, a height of -1 identifies + // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof. + struct CopyResult { + CordRep* edge; + int height; + }; + + // Logical position inside a node: + // - index: index of the edge. + // - n: size or offset value depending on context. + struct Position { + size_t index; + size_t n; + }; + + // Creates a btree from the given input. Adopts a ref of `rep`. + // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this + // function immediately returns `rep->btree()`. If the input is a valid data + // edge (see IsDataEdge()), then a new leaf node is returned containing `rep` + // as the sole data edge. Else, the input is assumed to be a (legacy) concat + // tree, and the input is consumed and transformed into a btree(). + static CordRepBtree* Create(CordRep* rep); + + // Destroys the provided tree. Should only be called by cord internal API's, + // typically after a ref_count.Decrement() on the last reference count. + static void Destroy(CordRepBtree* tree); + + // Use CordRep::Unref() as we overload for y_absl::Span<CordRep* const>. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(y_absl::Span<CordRep* const> edges); + + // Appends / Prepends an existing CordRep instance to this tree. + // The below methods accept three types of input: + // 1) `rep` is a data node (See `IsDataNode` for valid data edges). + // `rep` is appended or prepended to this tree 'as is'. + // 2) `rep` is a BTREE. + // `rep` is merged into `tree` respecting the Append/Prepend order. + // 3) `rep` is some other (legacy) type. + // `rep` is converted in place and added to `tree` + // Requires `tree` and `rep` to be not null. + static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); + static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); + + // Append/Prepend the data in `data` to this tree. + // The `extra` parameter defines how much extra capacity should be allocated + // for any additional FLAT being allocated. This is an optimization hint from + // the caller. For example, a caller may need to add 2 string_views of data + // "abc" and "defghi" which are not consecutive. The caller can in this case + // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated + // where possible with at least 6 bytes of extra capacity beyond `length`. + // This helps avoiding data getting fragmented over multiple flats. + // There is no limit on the size of `data`. If `data` can not be stored inside + // a single flat, then the function will iteratively add flats until all data + // has been consumed and appended or prepended to the tree. + static CordRepBtree* Append(CordRepBtree* tree, string_view data, + size_t extra = 0); + static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, + size_t extra = 0); + + // Returns a new tree, containing `n` bytes of data from this instance + // starting at offset `offset`. Where possible, the returned tree shares + // (re-uses) data edges and nodes with this instance to minimize the + // combined memory footprint of both trees. + // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. + CordRep* SubTree(size_t offset, size_t n); + + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + + // Returns the character at the given offset. + char GetCharacter(size_t offset) const; + + // Returns true if this node holds a single data edge, and if so, sets + // `fragment` to reference the contained data. `fragment` is an optional + // output parameter and allowed to be null. + bool IsFlat(y_absl::string_view* fragment) const; + + // Returns true if the data of `n` bytes starting at offset `offset` + // is contained in a single data edge, and if so, sets fragment to reference + // the contained data. `fragment` is an optional output parameter and allowed + // to be null. + bool IsFlat(size_t offset, size_t n, y_absl::string_view* fragment) const; + + // Returns a span (mutable range of bytes) of up to `size` bytes into the + // last FLAT data edge inside this tree under the following conditions: + // - none of the nodes down into the FLAT node are shared. + // - the last data edge in this tree is a non-shared FLAT. + // - the referenced FLAT has additional capacity available. + // If all these conditions are met, a non-empty span is returned, and the + // length of the flat node and involved tree nodes have been increased by + // `span.length()`. The caller is responsible for immediately assigning values + // to all uninitialized data reference by the returned span. + // Requires `this->refcount.IsMutable()`: this function forces the + // caller to do this fast path check on the top level node, as this is the + // most commonly shared node of a cord tree. + Span<char> GetAppendBuffer(size_t size); + + // Returns the `height` of the tree. The height of a tree is limited to + // kMaxHeight. `height` is implemented as an `int` as in some places we + // use negative (-1) values for 'data edges'. + int height() const { return static_cast<int>(storage[0]); } + + // Properties: begin, back, end, front/back boundary indexes. + size_t begin() const { return static_cast<size_t>(storage[1]); } + size_t back() const { return static_cast<size_t>(storage[2]) - 1; } + size_t end() const { return static_cast<size_t>(storage[2]); } + size_t index(EdgeType edge) const { + return edge == kFront ? begin() : back(); + } + + // Properties: size and capacity. + // `capacity` contains the current capacity of this instance, where + // `kMaxCapacity` contains the maximum capacity of a btree node. + // For now, `capacity` and `kMaxCapacity` return the same value, but this may + // change in the future if we see benefit in dynamically sizing 'small' nodes + // to 'large' nodes for large data trees. + size_t size() const { return end() - begin(); } + size_t capacity() const { return kMaxCapacity; } + + // Edge access + inline CordRep* Edge(size_t index) const; + inline CordRep* Edge(EdgeType edge_type) const; + inline y_absl::Span<CordRep* const> Edges() const; + inline y_absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; + + // Returns reference to the data edge at `index`. + // Requires this instance to be a leaf node, and `index` to be valid index. + inline y_absl::string_view Data(size_t index) const; + + static const char* EdgeDataPtr(const CordRep* r); + static y_absl::string_view EdgeData(const CordRep* r); + + // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node + // holding a FLAT or EXTERNAL child rep. + static bool IsDataEdge(const CordRep* rep); + + // Diagnostics: returns true if `tree` is valid and internally consistent. + // If `shallow` is false, then the provided top level node and all child nodes + // below it are recursively checked. If `shallow` is true, only the provided + // node in `tree` and the cumulative length, type and height of the direct + // child nodes of `tree` are checked. The value of `shallow` is ignored if the + // internal `cord_btree_exhaustive_validation` diagnostics variable is true, + // in which case the performed validations works as if `shallow` were false. + // This function is intended for debugging and testing purposes only. + static bool IsValid(const CordRepBtree* tree, bool shallow = false); + + // Diagnostics: asserts that the provided tree is valid. + // `AssertValid()` performs a shallow validation by default. `shallow` can be + // set to false in which case an exhaustive validation is performed. This + // function is implemented in terms of calling `IsValid()` and asserting the + // return value to be true. See `IsValid()` for more information. + // This function is intended for debugging and testing purposes only. + static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); + static const CordRepBtree* AssertValid(const CordRepBtree* tree, + bool shallow = true); + + // Diagnostics: dump the contents of this tree to `stream`. + // This function is intended for debugging and testing purposes only. + static void Dump(const CordRep* rep, std::ostream& stream); + static void Dump(const CordRep* rep, y_absl::string_view label, + std::ostream& stream); + static void Dump(const CordRep* rep, y_absl::string_view label, + bool include_contents, std::ostream& stream); + + // Adds the edge `edge` to this node if possible. `owned` indicates if the + // current node is potentially shared or not with other threads. Returns: + // - {kSelf, <this>} + // The edge was directly added to this node. + // - {kCopied, <node>} + // The edge was added to a copy of this node. + // - {kPopped, New(edge, height())} + // A new leg with the edge was created as this node has no extra capacity. + template <EdgeType edge_type> + inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); + + // Replaces the front or back edge with the provided new edge. Returns: + // - {kSelf, <this>} + // The edge was directly set in this node. The old edge is unreffed. + // - {kCopied, <node>} + // A copy of this node was created with the new edge value. + // In both cases, the function adopts a reference on `edge`. + template <EdgeType edge_type> + OpResult SetEdge(bool owned, CordRep* edge, size_t delta); + + // Creates a new empty node at the specified height. + static CordRepBtree* New(int height = 0); + + // Creates a new node containing `rep`, with the height being computed + // automatically based on the type of `rep`. + static CordRepBtree* New(CordRep* rep); + + // Creates a new node containing both `front` and `back` at height + // `front.height() + 1`. Requires `back.height() == front.height()`. + static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + + private: + CordRepBtree() = default; + ~CordRepBtree() = default; + + // Initializes the main properties `tag`, `begin`, `end`, `height`. + inline void InitInstance(int height, size_t begin = 0, size_t end = 0); + + // Direct property access begin / end + void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } + void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } + + // Decreases the value of `begin` by `n`, and returns the new value. Notice + // how this returns the new value unlike atomic::fetch_add which returns the + // old value. This is because this is used to prepend edges at 'begin - 1'. + size_t sub_fetch_begin(size_t n) { + storage[1] -= static_cast<uint8_t>(n); + return storage[1]; + } + + // Increases the value of `end` by `n`, and returns the previous value. This + // function is typically used to append edges at 'end'. + size_t fetch_add_end(size_t n) { + const uint8_t current = storage[2]; + storage[2] = static_cast<uint8_t>(current + n); + return current; + } + + // Returns the index of the last edge starting on, or before `offset`, with + // `n` containing the relative offset of `offset` inside that edge. + // Requires `offset` < length. + Position IndexOf(size_t offset) const; + + // Returns the index of the last edge starting before `offset`, with `n` + // containing the relative offset of `offset` inside that edge. + // This function is useful to find the edges for some span of bytes ending at + // `offset` (i.e., `n` bytes). For example: + // + // Position pos = IndexBefore(n) + // edges = Edges(begin(), pos.index) // All full edges (may be empty) + // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty) + // + // Requires 0 < `offset` <= length. + Position IndexBefore(size_t offset) const; + + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + + // Identical to the above function except starting from the position `front`. + // This function is equivalent to `IndexBefore(front.n + offset)`, with + // the difference that this function is optimized to start at `front.index`. + Position IndexBefore(Position front, size_t offset) const; + + // Returns the index of the edge directly beyond the edge containing offset + // `offset`, with `n` containing the distance of that edge from `offset`. + // This function is useful for iteratively finding suffix nodes and remaining + // partial bytes in left-most suffix nodes as for example in CopySuffix. + // Requires `offset` < length. + Position IndexBeyond(size_t offset) const; + + // Destruction + static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end); + static void Delete(CordRepBtree* tree) { delete tree; } + + // Creates a new leaf node containing as much data as possible from `data`. + // The data is added either forwards or reversed depending on `edge_type`. + // Callers must check the length of the returned node to determine if all data + // was copied or not. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* NewLeaf(y_absl::string_view data, size_t extra); + + // Creates a raw copy of this Btree node, copying all properties, but + // without adding any references to existing edges. + CordRepBtree* CopyRaw() const; + + // Creates a full copy of this Btree node, adding a reference on all edges. + CordRepBtree* Copy() const; + + // Creates a partial copy of this Btree node, copying all edges up to `end`, + // adding a reference on each copied edge, and sets the length of the newly + // created copy to `new_length`. + CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + + // Creates a partial copy of this Btree node, copying all edges starting at + // `begin`, adding a reference on each copied edge, and sets the length of + // the newly created copy to `new_length`. + CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + + // Returns a tree containing the result of appending `right` to `left`. + static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); + + // Fallback functions for `Create()`, `Append()` and `Prepend()` which + // deal with legacy / non conforming input, i.e.: CONCAT trees. + static CordRepBtree* CreateSlow(CordRep* rep); + static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); + static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + + // Aligns existing edges to start at index 0, to allow for a new edge to be + // added to the back of the current edges. + inline void AlignBegin(); + + // Aligns existing edges to end at `capacity`, to allow for a new edge to be + // added in front of the current edges. + inline void AlignEnd(); + + // Adds the provided edge to this node. + // Requires this node to have capacity for the edge. Realigns / moves + // existing edges as needed to prepend or append the new edge. + template <EdgeType edge_type> + inline void Add(CordRep* rep); + + // Adds the provided edges to this node. + // Requires this node to have capacity for the edges. Realigns / moves + // existing edges as needed to prepend or append the new edges. + template <EdgeType edge_type> + inline void Add(y_absl::Span<CordRep* const>); + + // Adds data from `data` to this node until either all data has been consumed, + // or there is no more capacity for additional flat nodes inside this node. + // Requires the current node to be a leaf node, data to be non empty, and the + // current node to have capacity for at least one more data edge. + // Returns any remaining data from `data` that was not added, which is + // depending on the edge type (front / back) either the remaining prefix of + // suffix of the input. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + y_absl::string_view AddData(y_absl::string_view data, size_t extra); + + // Replace the front or back edge with the provided value. + // Adopts a reference on `edge` and unrefs the old edge. + template <EdgeType edge_type> + inline void SetEdge(CordRep* edge); + + // Returns a partial copy of the current tree containing the first `n` bytes + // of data. `CopyResult` contains both the resulting edge and its height. The + // resulting tree may be less high than the current tree, or even be a single + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); + + // Returns a partial copy of the current tree containing all data starting + // after `offset`. `CopyResult` contains both the resulting edge and its + // height. The resulting tree may be less high than the current tree, or even + // be a single matching data edge. For example, if `n == length - 1`, then the + // result will be a single data edge, and height will be set to -1 (one below + // the owning leaf node). + // Requires `offset < length` + CopyResult CopySuffix(size_t offset); + + // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied} + // depending on the value of `owned`. + inline OpResult ToOpResult(bool owned); + + // Adds `rep` to the specified tree, returning the modified tree. + template <EdgeType edge_type> + static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); + + // Adds `data` to the specified tree, returning the modified tree. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* AddData(CordRepBtree* tree, y_absl::string_view data, + size_t extra = 0); + + // Merges `src` into `dst` with `src` being added either before (kFront) or + // after (kBack) `dst`. Requires the height of `dst` to be greater than or + // equal to the height of `src`. + template <EdgeType edge_type> + static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); + + // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer() + // implements an inlined version for trees of limited height (3 levels), + // GetAppendBufferSlow implements the logic for large trees. + Span<char> GetAppendBufferSlow(size_t size); + + // `edges_` contains all edges starting from this instance. + // These are explicitly `child` edges only, a cord btree (or any cord tree in + // that respect) does not store `parent` pointers anywhere: multiple trees / + // parents can reference the same shared child edge. The type of these edges + // depends on the height of the node. `Leaf nodes` (height == 0) contain `data + // edges` (external or flat nodes, or sub-strings thereof). All other nodes + // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`. + CordRep* edges_[kMaxCapacity]; + + friend class CordRepBtreeTestPeer; + friend class CordRepBtreeNavigator; +}; + +inline CordRepBtree* CordRep::btree() { + assert(IsBtree()); + return static_cast<CordRepBtree*>(this); +} + +inline const CordRepBtree* CordRep::btree() const { + assert(IsBtree()); + return static_cast<const CordRepBtree*>(this); +} + +inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { + tag = BTREE; + storage[0] = static_cast<uint8_t>(height); + storage[1] = static_cast<uint8_t>(begin); + storage[2] = static_cast<uint8_t>(end); +} + +inline CordRep* CordRepBtree::Edge(size_t index) const { + assert(index >= begin()); + assert(index < end()); + return edges_[index]; +} + +inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { + return edges_[edge_type == kFront ? begin() : back()]; +} + +inline y_absl::Span<CordRep* const> CordRepBtree::Edges() const { + return {edges_ + begin(), size()}; +} + +inline y_absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, + size_t end) const { + assert(begin <= end); + assert(begin >= this->begin()); + assert(end <= this->end()); + return {edges_ + begin, static_cast<size_t>(end - begin)}; +} + +inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) { + assert(IsDataEdge(r)); + size_t offset = 0; + if (r->tag == SUBSTRING) { + offset = r->substring()->start; + r = r->substring()->child; + } + return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset; +} + +inline y_absl::string_view CordRepBtree::EdgeData(const CordRep* r) { + return y_absl::string_view(EdgeDataPtr(r), r->length); +} + +inline y_absl::string_view CordRepBtree::Data(size_t index) const { + assert(height() == 0); + return EdgeData(Edge(index)); +} + +inline bool CordRepBtree::IsDataEdge(const CordRep* rep) { + // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below + // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL + // check in the slow path the SUBSTRING check to optimize for the hot path. + if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; + if (rep->tag == SUBSTRING) rep = rep->substring()->child; + return rep->tag == EXTERNAL || rep->tag >= FLAT; +} + +inline CordRepBtree* CordRepBtree::New(int height) { + CordRepBtree* tree = new CordRepBtree; + tree->length = 0; + tree->InitInstance(height); + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRep* rep) { + CordRepBtree* tree = new CordRepBtree; + int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0; + tree->length = rep->length; + tree->InitInstance(height, /*begin=*/0, /*end=*/1); + tree->edges_[0] = rep; + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, + CordRepBtree* back) { + assert(front->height() == back->height()); + CordRepBtree* tree = new CordRepBtree; + tree->length = front->length + back->length; + tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); + tree->edges_[0] = front; + tree->edges_[1] = back; + return tree; +} + +inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin, + size_t end) { + if (tree->height() == 0) { + DestroyLeaf(tree, begin, end); + } else { + DestroyNonLeaf(tree, begin, end); + } +} + +inline void CordRepBtree::Destroy(CordRepBtree* tree) { + DestroyTree(tree, tree->begin(), tree->end()); +} + +inline void CordRepBtree::Unref(y_absl::Span<CordRep* const> edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + +inline CordRepBtree* CordRepBtree::CopyRaw() const { + auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); + memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); + new (&tree->refcount) RefcountAndFlags; + return tree; +} + +inline CordRepBtree* CordRepBtree::Copy() const { + CordRepBtree* tree = CopyRaw(); + for (CordRep* rep : Edges()) CordRep::Ref(rep); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, + size_t new_length) const { + assert(begin >= this->begin()); + assert(begin <= this->end()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_begin(begin); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, + size_t new_length) const { + assert(end <= capacity()); + assert(end >= this->begin()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_end(end); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline void CordRepBtree::AlignBegin() { + // The below code itself does not need to be fast as typically we have + // mono-directional append/prepend calls, and `begin` / `end` are typically + // adjusted no more than once. But we want to avoid potential register clobber + // effects, making the compiler emit register save/store/spills, and minimize + // the size of code. + const size_t delta = begin(); + if (ABSL_PREDICT_FALSE(delta != 0)) { + const size_t new_end = end() - delta; + set_begin(0); + set_end(new_end); + // TODO(mvels): we can write this using 2 loads / 2 stores depending on + // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on + // size, and then do overlapping load/store of up to 4 pointers (inlined as + // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a) + // compact and b) not clobbering any registers. + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = 0; i < new_end; ++i) { + edges_[i] = edges_[i + delta]; + } + } +} + +inline void CordRepBtree::AlignEnd() { + // See comments in `AlignBegin` for motivation on the hand-rolled for loops. + const size_t delta = capacity() - end(); + if (delta != 0) { + const size_t new_begin = begin() + delta; + const size_t new_end = end() + delta; + set_begin(new_begin); + set_end(new_end); + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = new_end - 1; i >= new_begin; --i) { + edges_[i] = edges_[i - delta]; + } + } +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { + AlignBegin(); + edges_[fetch_add_end(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>( + y_absl::Span<CordRep* const> edges) { + AlignBegin(); + size_t new_end = end(); + for (CordRep* edge : edges) edges_[new_end++] = edge; + set_end(new_end); +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { + AlignEnd(); + edges_[sub_fetch_begin(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>( + y_absl::Span<CordRep* const> edges) { + AlignEnd(); + size_t new_begin = begin() - edges.size(); + set_begin(new_begin); + for (CordRep* edge : edges) edges_[new_begin++] = edge; +} + +template <CordRepBtree::EdgeType edge_type> +inline void CordRepBtree::SetEdge(CordRep* edge) { + const int idx = edge_type == kFront ? begin() : back(); + CordRep::Unref(edges_[idx]); + edges_[idx] = edge; +} + +inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { + return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { + assert(offset < length); + size_t index = begin(); + while (offset >= edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { + assert(offset > 0); + assert(offset <= length); + size_t index = begin(); + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, + size_t offset) const { + size_t index = front.index; + offset = offset + front.n; + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBeyond( + const size_t offset) const { + // We need to find the edge which `starting offset` is beyond (>=)`offset`. + // For this we can't use the `offset -= length` logic of IndexOf. Instead, we + // track the offset of the `current edge` in `off`, which we increase as we + // iterate over the edges until we find the matching edge. + size_t off = 0; + size_t index = begin(); + while (offset > off) off += edges_[index++]->length; + return {index, off - offset}; +} + +inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { + if (IsDataEdge(rep)) return New(rep); + return CreateSlow(rep); +} + +inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRepBtree* tree = this; + const int height = this->height(); + CordRepBtree* n1 = tree; + CordRepBtree* n2 = tree; + CordRepBtree* n3 = tree; + switch (height) { + case 3: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n2 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 2: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n1 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 1: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + ABSL_FALLTHROUGH_INTENDED; + case 0: + CordRep* edge = tree->Edge(kBack); + if (!edge->refcount.IsMutable()) return {}; + if (edge->tag < FLAT) return {}; + size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + switch (height) { + case 3: + n3->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 2: + n2->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 1: + n1->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 0: + tree->length += delta; + return span; + } + break; + } + return GetAppendBufferSlow(size); +} + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( + CordRepBtree* tree, CordRep* rep); + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( + CordRepBtree* tree, CordRep* rep); + +inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kBack>(tree, rep); + } + return AppendSlow(tree, rep); +} + +inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kFront>(tree, rep); + } + return PrependSlow(tree, rep); +} + +#ifdef NDEBUG + +inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.cc new file mode 100644 index 0000000000..6dae7bcd3e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.cc @@ -0,0 +1,185 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cord_rep_btree_navigator.h" + +#include <cassert> + +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ReadResult = CordRepBtreeNavigator::ReadResult; + +namespace { + +// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`. +// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is +// created based on the old offset and new offset. +// Adopts a reference on `rep`. Rep must be a valid data edge. Returns +// nullptr if `n == 0`, `rep` if `n == rep->length`. +// Requires `offset < rep->length` and `offset + n <= rep->length`. +// TODO(192061034): move to utility library in internal and optimize for small +// substrings of larger reps. +inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { + assert(n <= rep->length); + assert(offset < rep->length); + assert(offset <= rep->length - n); + assert(CordRepBtree::IsDataEdge(rep)); + + if (n == 0) return nullptr; + if (n == rep->length) return CordRep::Ref(rep); + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = CordRep::Ref(rep); + return substring; +} + +inline CordRep* Substring(CordRep* rep, size_t offset) { + return Substring(rep, offset, rep->length - offset); +} + +} // namespace + +CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) { + int height = 0; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + + // Overall logic: Find an edge of at least the length we need to skip. + // We consume all edges which are smaller (i.e., must be 100% skipped). + // If we exhausted all edges on the current level, we move one level + // up the tree, and repeat until we either find the edge, or until we hit + // the top of the tree meaning the skip exceeds tree->length. + while (n >= edge->length) { + n -= edge->length; + while (++index == node->end()) { + if (++height > height_) return {nullptr, n}; + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + } + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be skipped. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + while (n >= edge->length) { + n -= edge->length; + ++index; + assert(index != node->end()); + edge = node->Edge(index); + } + } + index_[0] = index; + return {edge, n}; +} + +ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) { + int height = 0; + size_t length = edge_offset + n; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + assert(edge_offset < edge->length); + + if (length < edge->length) { + return {Substring(edge, edge_offset, n), length}; + } + + // Similar to 'Skip', we consume all edges that are inside the 'length' of + // data that needs to be read. If we exhaust the current level, we move one + // level up the tree and repeat until we hit the final edge that must be + // (partially) read. We consume all edges into `subtree`. + CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset)); + size_t subtree_end = 1; + do { + length -= edge->length; + while (++index == node->end()) { + index_[height] = index; + if (++height > height_) { + subtree->set_end(subtree_end); + if (length == 0) return {subtree, 0}; + CordRep::Unref(subtree); + return {nullptr, length}; + } + if (length != 0) { + subtree->set_end(subtree_end); + subtree = CordRepBtree::New(subtree); + subtree_end = 1; + } + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + if (length >= edge->length) { + subtree->length += edge->length; + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + } + } while (length >= edge->length); + CordRepBtree* tree = subtree; + subtree->length += length; + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be read, adding 'down' nodes to `subtree`. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + + if (length != 0) { + CordRepBtree* right = CordRepBtree::New(height); + right->length = length; + subtree->edges_[subtree_end++] = right; + subtree->set_end(subtree_end); + subtree = right; + subtree_end = 0; + while (length >= edge->length) { + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + length -= edge->length; + edge = node->Edge(++index); + } + } + } + // Add any (partial) edge still remaining at the leaf level. + if (length != 0) { + subtree->edges_[subtree_end++] = Substring(edge, 0, length); + } + subtree->set_end(subtree_end); + index_[0] = index; + return {tree, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.h new file mode 100644 index 0000000000..40c58e3b3c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_navigator.h @@ -0,0 +1,265 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ + +#include <cassert> +#include <iostream> + +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeNavigator is a bi-directional navigator allowing callers to +// navigate all the (leaf) data edges in a CordRepBtree instance. +// +// A CordRepBtreeNavigator instance is by default empty. Callers initialize a +// navigator instance by calling one of `InitFirst()`, `InitLast()` or +// `InitOffset()`, which establishes a current position. Callers can then +// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods. +// +// The navigator instance does not take or adopt a reference on the provided +// `tree` on any of the initialization calls. Callers are responsible for +// guaranteeing the lifecycle of the provided tree. A navigator instance can +// be reset to the empty state by calling `Reset`. +// +// A navigator only keeps positional state on the 'current data edge', it does +// explicitly not keep any 'offset' state. The class does accept and return +// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would +// otherwise put a big burden on callers. Callers are expected to maintain +// (returned) offset info if they require such granular state. +class CordRepBtreeNavigator { + public: + // The logical position as returned by the Seek() and Skip() functions. + // Returns the current leaf edge for the desired seek or skip position and + // the offset of that position inside that edge. + struct Position { + CordRep* edge; + size_t offset; + }; + + // The read result as returned by the Read() function. + // `tree` contains the resulting tree which is identical to the result + // of calling CordRepBtree::SubTree(...) on the tree being navigated. + // `n` contains the number of bytes used from the last navigated to + // edge of the tree. + struct ReadResult { + CordRep* tree; + size_t n; + }; + + // Returns true if this instance is not empty. + explicit operator bool() const; + + // Returns the tree for this instance or nullptr if empty. + CordRepBtree* btree() const; + + // Returns the data edge of the current position. + // Requires this instance to not be empty. + CordRep* Current() const; + + // Resets this navigator to `tree`, returning the first data edge in the tree. + CordRep* InitFirst(CordRepBtree* tree); + + // Resets this navigator to `tree`, returning the last data edge in the tree. + CordRep* InitLast(CordRepBtree* tree); + + // Resets this navigator to `tree` returning the data edge at position + // `offset` and the relative offset of `offset` into that data edge. + // Returns `Position.edge = nullptr` if the provided offset is greater + // than or equal to the length of the tree, in which case the state of + // the navigator instance remains unchanged. + Position InitOffset(CordRepBtree* tree, size_t offset); + + // Navigates to the next data edge. + // Returns the next data edge or nullptr if there is no next data edge, in + // which case the current position remains unchanged. + CordRep* Next(); + + // Navigates to the previous data edge. + // Returns the previous data edge or nullptr if there is no previous data + // edge, in which case the current position remains unchanged. + CordRep* Previous(); + + // Navigates to the data edge at position `offset`. Returns the navigated to + // data edge in `Position.edge` and the relative offset of `offset` into that + // data edge in `Position.offset`. Returns `Position.edge = nullptr` if the + // provide offset is greater than or equal to the tree's length. + Position Seek(size_t offset); + + // Reads `n` bytes of data starting at offset `edge_offset` of the current + // data edge, and returns the result in `ReadResult.tree`. `ReadResult.n` + // contains the 'bytes used` from the last / current data edge in the tree. + // This allows users that mix regular navigation (using string views) and + // 'read into cord' navigation to keep track of the current state, and which + // bytes have been consumed from a navigator. + // This function returns `ReadResult.tree = nullptr` if the requested length + // exceeds the length of the tree starting at the current data edge. + ReadResult Read(size_t edge_offset, size_t n); + + // Skips `n` bytes forward from the current data edge, returning the navigated + // to data edge in `Position.edge` and `Position.offset` containing the offset + // inside that data edge. Note that the state of the navigator is left + // unchanged if `n` is smaller than the length of the current data edge. + Position Skip(size_t n); + + // Resets this instance to the default / empty state. + void Reset(); + + private: + // Slow path for Next() if Next() reached the end of a leaf node. Backtracks + // up the stack until it finds a node that has a 'next' position available, + // and then does a 'front dive' towards the next leaf node. + CordRep* NextUp(); + + // Slow path for Previous() if Previous() reached the beginning of a leaf + // node. Backtracks up the stack until it finds a node that has a 'previous' + // position available, and then does a 'back dive' towards the previous leaf + // node. + CordRep* PreviousUp(); + + // Generic implementation of InitFirst() and InitLast(). + template <CordRepBtree::EdgeType edge_type> + CordRep* Init(CordRepBtree* tree); + + // `height_` contains the height of the current tree, or -1 if empty. + int height_ = -1; + + // `index_` and `node_` contain the navigation state as the 'path' to the + // current data edge which is at `node_[0]->Edge(index_[0])`. The contents + // of these are undefined until the instance is initialized (`height_ >= 0`). + uint8_t index_[CordRepBtree::kMaxHeight]; + CordRepBtree* node_[CordRepBtree::kMaxHeight]; +}; + +// Returns true if this instance is not empty. +inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; } + +inline CordRepBtree* CordRepBtreeNavigator::btree() const { + return height_ >= 0 ? node_[height_] : nullptr; +} + +inline CordRep* CordRepBtreeNavigator::Current() const { + assert(height_ >= 0); + return node_[0]->Edge(index_[0]); +} + +inline void CordRepBtreeNavigator::Reset() { height_ = -1; } + +inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) { + return Init<CordRepBtree::kFront>(tree); +} + +inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) { + return Init<CordRepBtree::kBack>(tree); +} + +template <CordRepBtree::EdgeType edge_type> +inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) { + assert(tree != nullptr); + assert(tree->size() > 0); + int height = height_ = tree->height(); + size_t index = tree->index(edge_type); + node_[height] = tree; + index_[height] = static_cast<uint8_t>(index); + while (--height >= 0) { + tree = tree->Edge(index)->btree(); + node_[height] = tree; + index = tree->index(edge_type); + index_[height] = static_cast<uint8_t>(index); + } + return node_[0]->Edge(index); +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek( + size_t offset) { + assert(btree() != nullptr); + int height = height_; + CordRepBtree* edge = node_[height]; + if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0}; + CordRepBtree::Position index = edge->IndexOf(offset); + index_[height] = static_cast<uint8_t>(index.index); + while (--height >= 0) { + edge = edge->Edge(index.index)->btree(); + node_[height] = edge; + index = edge->IndexOf(index.n); + index_[height] = static_cast<uint8_t>(index.index); + } + return {edge->Edge(index.index), index.n}; +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset( + CordRepBtree* tree, size_t offset) { + assert(tree != nullptr); + if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0}; + height_ = tree->height(); + node_[height_] = tree; + return Seek(offset); +} + +inline CordRep* CordRepBtreeNavigator::Next() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::Previous() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::NextUp() { + assert(index_[0] == node_[0]->back()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height] + 1; + } while (index == edge->end()); + index_[height] = static_cast<uint8_t>(index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->begin()); + } while (height > 0); + return edge->Edge(index); +} + +inline CordRep* CordRepBtreeNavigator::PreviousUp() { + assert(index_[0] == node_[0]->begin()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height]; + } while (index == edge->begin()); + index_[height] = static_cast<uint8_t>(--index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->back()); + } while (height > 0); + return edge->Edge(index); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.cc new file mode 100644 index 0000000000..0bc9dba2e6 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.cc @@ -0,0 +1,68 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cord_rep_btree_reader.h" + +#include <cassert> + +#include "y_absl/base/config.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_btree_navigator.h" +#include "y_absl/strings/internal/cord_rep_flat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +y_absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, + CordRep*& tree) { + assert(chunk_size <= navigator_.Current()->length); + + // If chunk_size is non-zero, we need to start inside last returned edge. + // Else we start reading at the next data edge of the tree. + CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next(); + const size_t offset = chunk_size ? edge->length - chunk_size : 0; + + // Read the sub tree and verify we got what we wanted. + ReadResult result = navigator_.Read(offset, n); + tree = result.tree; + + // If the data returned in `tree` was covered entirely by `chunk_size`, i.e., + // read from the 'previous' edge, we did not consume any additional data, and + // can directly return the substring into the current data edge as the next + // chunk. We can easily establish from the above code that `navigator_.Next()` + // has not been called as that requires `chunk_size` to be zero. + if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n); + + // The amount of data taken from the last edge is `chunk_size` and `result.n` + // contains the offset into the current edge trailing the read data (which can + // be 0). As the call to `navigator_.Read()` could have consumed all remaining + // data, calling `navigator_.Current()` is not safe before checking if we + // already consumed all remaining data. + const size_t consumed_by_read = n - chunk_size - result.n; + if (consumed_by_read >= remaining_) { + remaining_ = 0; + return {}; + } + + // We did not read all data, return remaining data from current edge. + edge = navigator_.Current(); + remaining_ -= consumed_by_read + edge->length; + return CordRepBtree::EdgeData(edge).substr(result.n); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.h new file mode 100644 index 0000000000..00b2261f71 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_btree_reader.h @@ -0,0 +1,211 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ + +#include <cassert> + +#include "y_absl/base/config.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_btree_navigator.h" +#include "y_absl/strings/internal/cord_rep_flat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeReader implements logic to iterate over cord btrees. +// References to the underlying data are returned as y_absl::string_view values. +// The most typical use case is a forward only iteration over tree data. +// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to +// CordRepBtreeNavigator that allow more advanced navigation. +// +// Example: iterate over all data inside a cord btree: +// +// CordRepBtreeReader reader; +// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) { +// DoSomethingWithDataIn(sv); +// } +// +// All navigation methods always return the next 'chunk' of data. The class +// assumes that all data is directly 'consumed' by the caller. For example: +// invoking `Skip()` will skip the desired number of bytes, and directly +// read and return the next chunk of data directly after the skipped bytes. +// +// Example: iterate over all data inside a btree skipping the first 100 bytes: +// +// CordRepBtreeReader reader; +// y_absl::string_view sv = reader.Init(tree); +// if (sv.length() > 100) { +// sv.RemovePrefix(100); +// } else { +// sv = reader.Skip(100 - sv.length()); +// } +// while (!sv.empty()) { +// DoSomethingWithDataIn(sv); +// y_absl::string_view sv = reader.Next(); +// } +// +// It is important to notice that `remaining` is based on the end position of +// the last data edge returned to the caller, not the cumulative data returned +// to the caller which can be less in cases of skipping or seeking over data. +// +// For example, consider a cord btree with five data edges: "abc", "def", "ghi", +// "jkl" and "mno": +// +// y_absl::string_view sv; +// CordRepBtreeReader reader; +// +// sv = reader.Init(tree); // sv = "abc", remaining = 12 +// sv = reader.Skip(4); // sv = "hi", remaining = 6 +// sv = reader.Skip(2); // sv = "l", remaining = 3 +// sv = reader.Next(); // sv = "mno", remaining = 0 +// sv = reader.Seek(1); // sv = "bc", remaining = 12 +// +class CordRepBtreeReader { + public: + using ReadResult = CordRepBtreeNavigator::ReadResult; + using Position = CordRepBtreeNavigator::Position; + + // Returns true if this instance is not empty. + explicit operator bool() const { return navigator_.btree() != nullptr; } + + // Returns the tree referenced by this instance or nullptr if empty. + CordRepBtree* btree() const { return navigator_.btree(); } + + // Returns the current data edge inside the referenced btree. + // Requires that the current instance is not empty. + CordRep* node() const { return navigator_.Current(); } + + // Returns the length of the referenced tree. + // Requires that the current instance is not empty. + size_t length() const; + + // Returns the number of remaining bytes available for iteration, which is the + // number of bytes directly following the end of the last chunk returned. + // This value will be zero if we iterated over the last edge in the bound + // tree, in which case any call to Next() or Skip() will return an empty + // string_view reflecting the EOF state. + // Note that a call to `Seek()` resets `remaining` to a value based on the + // end position of the chunk returned by that call. + size_t remaining() const { return remaining_; } + + // Resets this instance to an empty value. + void Reset() { navigator_.Reset(); } + + // Initializes this instance with `tree`. `tree` must not be null. + // Returns a reference to the first data edge of the provided tree. + y_absl::string_view Init(CordRepBtree* tree); + + // Navigates to and returns the next data edge of the referenced tree. + // Returns an empty string_view if an attempt is made to read beyond the end + // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition. + // Requires that the current instance is not empty. + y_absl::string_view Next(); + + // Skips the provided amount of bytes and returns a reference to the data + // directly following the skipped bytes. + y_absl::string_view Skip(size_t skip); + + // Reads `n` bytes into `tree`. + // If `chunk_size` is zero, starts reading at the next data edge. If + // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of + // the last returned data edge. Effectively, this means that the read starts + // at offset `consumed() - chunk_size`. + // Requires that `chunk_size` is less than or equal to the length of the + // last returned data edge. The purpose of `chunk_size` is to simplify code + // partially consuming a returned chunk and wanting to include the remaining + // bytes in the Read call. For example, the below code will read 1000 bytes of + // data into a cord tree if the first chunk starts with "big:": + // + // CordRepBtreeReader reader; + // y_absl::string_view sv = reader.Init(tree); + // if (y_absl::StartsWith(sv, "big:")) { + // CordRepBtree tree; + // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree); + // } + // + // This method will return an empty string view if all remaining data was + // read. If `n` exceeded the amount of remaining data this function will + // return an empty string view and `tree` will be set to nullptr. + // In both cases, `consumed` will be set to `length`. + y_absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree); + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling `Seek(13)` on a + // cord tree containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Returns an empty string view if `offset` is equal to or greater than the + // length of the referenced tree. + y_absl::string_view Seek(size_t offset); + + private: + size_t remaining_ = 0; + CordRepBtreeNavigator navigator_; +}; + +inline size_t CordRepBtreeReader::length() const { + assert(btree() != nullptr); + return btree()->length; +} + +inline y_absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { + assert(tree != nullptr); + const CordRep* edge = navigator_.InitFirst(tree); + remaining_ = tree->length - edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline y_absl::string_view CordRepBtreeReader::Next() { + if (remaining_ == 0) return {}; + const CordRep* edge = navigator_.Next(); + assert(edge != nullptr); + remaining_ -= edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline y_absl::string_view CordRepBtreeReader::Skip(size_t skip) { + // As we are always positioned on the last 'consumed' edge, we + // need to skip the current edge as well as `skip`. + const size_t edge_length = navigator_.Current()->length; + CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + // The combined length of all edges skipped before `pos.edge` is `skip - + // pos.offset`, all of which are 'consumed', as well as the current edge. + remaining_ -= skip - pos.offset + pos.edge->length; + return CordRepBtree::EdgeData(pos.edge).substr(pos.offset); +} + +inline y_absl::string_view CordRepBtreeReader::Seek(size_t offset) { + const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + y_absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + remaining_ = length() - offset - chunk.length(); + return chunk; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.cc new file mode 100644 index 0000000000..ffc0179e52 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.cc @@ -0,0 +1,129 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cord_rep_consume.h" + +#include <array> +#include <utility> + +#include "y_absl/container/inlined_vector.h" +#include "y_absl/functional/function_ref.h" +#include "y_absl/strings/internal/cord_internal.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + CordRep::Unref(substring); + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +// Returns an array of 2 elements containing the left and right nodes. +std::array<CordRep*, 2> ClipConcat(CordRepConcat* concat) { + std::array<CordRep*, 2> result{concat->left, concat->right}; + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result[0]); + CordRep::Ref(result[1]); + CordRep::Unref(concat); + } + return result; +} + +void Consume(bool forward, CordRep* rep, ConsumeFn consume_fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + y_absl::InlinedVector<Entry, 40> stack; + + for (;;) { + if (rep->tag == CONCAT) { + std::array<CordRep*, 2> res = ClipConcat(rep->concat()); + CordRep* left = res[0]; + CordRep* right = res[1]; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (forward) { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } else { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else { + consume_fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } + } +} + +} // namespace + +void Consume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(true, rep, std::move(consume_fn)); +} + +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(false, rep, std::move(consume_fn)); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.h new file mode 100644 index 0000000000..7f6e5584f4 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_consume.h @@ -0,0 +1,50 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ + +#include <functional> + +#include "y_absl/functional/function_ref.h" +#include "y_absl/strings/internal/cord_internal.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Functor for the Consume() and ReverseConsume() functions: +// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); +// See the Consume() and ReverseConsume() function comments for documentation. +using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; + +// Consume() and ReverseConsume() consume CONCAT based trees and invoke the +// provided functor with the contained nodes in the proper forward or reverse +// order, which is used to convert CONCAT trees into other tree or cord data. +// All CONCAT and SUBSTRING nodes are processed internally. The 'offset` +// parameter of the functor is non-zero for any nodes below SUBSTRING nodes. +// It's up to the caller to form these back into SUBSTRING nodes or otherwise +// store offset / prefix information. These functions are intended to be used +// only for migration / transitional code where due to factors such as ODR +// violations, we can not 100% guarantee that all code respects 'new format' +// settings and flags, so we need to be able to parse old data on the fly until +// all old code is deprecated / no longer the default format. +void Consume(CordRep* rep, ConsumeFn consume_fn); +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_flat.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_flat.h new file mode 100644 index 0000000000..976613031c --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_flat.h @@ -0,0 +1,146 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> + +#include "y_absl/strings/internal/cord_internal.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Note: all constants below are never ODR used and internal to cord, we define +// these as static constexpr to avoid 'in struct' definition and usage clutter. + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and +// ideally a 'nice' size aligning with allocation and cacheline sizes like 32. +// kMaxFlatSize is bounded by the size resulting in a computed tag no greater +// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values. +static constexpr size_t kFlatOverhead = offsetof(CordRep, storage); +static constexpr size_t kMinFlatSize = 32; +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; + +constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { + return static_cast<uint8_t>((size <= 1024) ? size / 8 + 1 + : 129 + size / 32 - 1024 / 32); +} + +static_assert(kMinFlatSize / 8 + 1 >= FLAT, ""); +static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); + +// Helper functions for rounded div, and rounding to exact sizes. +constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +inline size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +inline uint8_t AllocatedSizeToTag(size_t size) { + const uint8_t tag = AllocatedSizeToTagUnchecked(size); + assert(tag <= MAX_FLAT_TAG); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 129) ? ((tag - 1) * 8) : (1024 + (tag - 129) * 32); +} + +// Converts the provided tag to the corresponding available data length +constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(225) == kMaxFlatSize, "Bad tag logic"); + +struct CordRepFlat : public CordRep { + // Creates a new flat node. + static CordRepFlat* New(size_t len) { + if (len <= kMinFlatLength) { + len = kMinFlatLength; + } else if (len > kMaxFlatLength) { + len = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(len + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRepFlat* rep = new (raw_rep) CordRepFlat(); + rep->tag = AllocatedSizeToTag(size); + return rep; + } + + // Deletes a CordRepFlat instance created previously through a call to New(). + // Flat CordReps are allocated and constructed with raw ::operator new and + // placement new, and must be destructed and deallocated accordingly. + static void Delete(CordRep*rep) { + assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG); + +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + } + + // Returns a pointer to the data inside this flat rep. + char* Data() { return reinterpret_cast<char*>(storage); } + const char* Data() const { return reinterpret_cast<const char*>(storage); } + + // Returns the maximum capacity (payload size) of this instance. + size_t Capacity() const { return TagToLength(tag); } + + // Returns the allocated size (payload + overhead) of this instance. + size_t AllocatedSize() const { return TagToAllocatedSize(tag); } +}; + +// Now that CordRepFlat is defined, we can define CordRep's helper casts: +inline CordRepFlat* CordRep::flat() { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<CordRepFlat*>(this); +} + +inline const CordRepFlat* CordRep::flat() const { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<const CordRepFlat*>(this); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.cc new file mode 100644 index 0000000000..06c7e75bd8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.cc @@ -0,0 +1,771 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "y_absl/strings/internal/cord_rep_ring.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iostream> +#include <limits> +#include <memory> +#include <util/generic/string.h> + +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/internal/throw_delegate.h" +#include "y_absl/base/macros.h" +#include "y_absl/container/inlined_vector.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_consume.h" +#include "y_absl/strings/internal/cord_rep_flat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +using index_type = CordRepRing::index_type; + +enum class Direction { kForward, kReversed }; + +inline bool IsFlatOrExternal(CordRep* rep) { + return rep->IsFlat() || rep->IsExternal(); +} + +// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. +inline void CheckCapacity(size_t n, size_t extra) { + if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) { + base_internal::ThrowStdLengthError("Maximum capacity exceeded"); + } +} + +// Creates a flat from the provided string data, allocating up to `extra` +// capacity in the returned flat depending on kMaxFlatLength limitations. +// Requires `len` to be less or equal to `kMaxFlatLength` +CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT + assert(n <= kMaxFlatLength); + auto* rep = CordRepFlat::New(n + extra); + rep->length = n; + memcpy(rep->Data(), s, n); + return rep; +} + +// Unrefs the entries in `[head, tail)`. +// Requires all entries to be a FLAT or EXTERNAL node. +void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { + rep->ForEach(head, tail, [rep](index_type ix) { + CordRep* child = rep->entry_child(ix); + if (!child->refcount.Decrement()) { + if (child->tag >= FLAT) { + CordRepFlat::Delete(child->flat()); + } else { + CordRepExternal::Delete(child->external()); + } + } + }); +} + +} // namespace + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { + // Note: 'pos' values are defined as size_t (for overflow reasons), but that + // prints really awkward for small prepended values such as -5. ssize_t is not + // portable (POSIX), so we use ptrdiff_t instead to cast to signed values. + s << " CordRepRing(" << &rep << ", length = " << rep.length + << ", head = " << rep.head_ << ", tail = " << rep.tail_ + << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get() + << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n"; + CordRepRing::index_type head = rep.head(); + do { + CordRep* child = rep.entry_child(head); + s << " entry[" << head << "] length = " << rep.entry_length(head) + << ", child " << child << ", clen = " << child->length + << ", tag = " << static_cast<int>(child->tag) + << ", rc = " << child->refcount.Get() + << ", offset = " << rep.entry_data_offset(head) + << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head)) + << "\n"; + head = rep.advance(head); + } while (head != rep.tail()); + return s << "}\n"; +} + +void CordRepRing::AddDataOffset(index_type index, size_t n) { + entry_data_offset()[index] += static_cast<offset_type>(n); +} + +void CordRepRing::SubLength(index_type index, size_t n) { + entry_end_pos()[index] -= n; +} + +class CordRepRing::Filler { + public: + Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {} + + index_type head() const { return head_; } + index_type pos() const { return pos_; } + + void Add(CordRep* child, size_t offset, pos_type end_pos) { + rep_->entry_end_pos()[pos_] = end_pos; + rep_->entry_child()[pos_] = child; + rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset); + pos_ = rep_->advance(pos_); + } + + private: + CordRepRing* rep_; + index_type head_; + index_type pos_; +}; + +constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 + +bool CordRepRing::IsValid(std::ostream& output) const { + if (capacity_ == 0) { + output << "capacity == 0"; + return false; + } + + if (head_ >= capacity_ || tail_ >= capacity_) { + output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity " + << capacity_; + return false; + } + + const index_type back = retreat(tail_); + size_t pos_length = Distance(begin_pos_, entry_end_pos(back)); + if (pos_length != length) { + output << "length " << length << " does not match positional length " + << pos_length << " from begin_pos " << begin_pos_ << " and entry[" + << back << "].end_pos " << entry_end_pos(back); + return false; + } + + index_type head = head_; + pos_type begin_pos = begin_pos_; + do { + pos_type end_pos = entry_end_pos(head); + size_t entry_length = Distance(begin_pos, end_pos); + if (entry_length == 0) { + output << "entry[" << head << "] has an invalid length " << entry_length + << " from begin_pos " << begin_pos << " and end_pos " << end_pos; + return false; + } + + CordRep* child = entry_child(head); + if (child == nullptr) { + output << "entry[" << head << "].child == nullptr"; + return false; + } + if (child->tag < FLAT && child->tag != EXTERNAL) { + output << "entry[" << head << "].child has an invalid tag " + << static_cast<int>(child->tag); + return false; + } + + size_t offset = entry_data_offset(head); + if (offset >= child->length || entry_length > child->length - offset) { + output << "entry[" << head << "] has offset " << offset + << " and entry length " << entry_length + << " which are outside of the child's length of " << child->length; + return false; + } + + begin_pos = end_pos; + head = advance(head); + } while (head != tail_); + + return true; +} + +#ifdef EXTRA_CORD_RING_VALIDATION +CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file, + int line) { + if (!rep->IsValid(std::cerr)) { + std::cerr << "\nERROR: CordRepRing corrupted"; + if (line) std::cerr << " at line " << line; + if (file) std::cerr << " in file " << file; + std::cerr << "\nContent = " << *rep; + abort(); + } + return rep; +} +#endif // EXTRA_CORD_RING_VALIDATION + +CordRepRing* CordRepRing::New(size_t capacity, size_t extra) { + CheckCapacity(capacity, extra); + + size_t size = AllocSize(capacity += extra); + void* mem = ::operator new(size); + auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity)); + rep->tag = RING; + rep->capacity_ = static_cast<index_type>(capacity); + rep->begin_pos_ = 0; + return rep; +} + +void CordRepRing::SetCapacityForTesting(size_t capacity) { + // Adjust for the changed layout + assert(capacity <= capacity_); + assert(head() == 0 || head() < tail()); + memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(), + Layout::Partial(capacity_).Pointer<1>(data_) + head(), + entries() * sizeof(Layout::ElementType<1>)); + memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(), + Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(), + entries() * sizeof(Layout::ElementType<2>)); + capacity_ = static_cast<index_type>(capacity); +} + +void CordRepRing::Delete(CordRepRing* rep) { + assert(rep != nullptr && rep->IsRing()); +#if defined(__cpp_sized_deallocation) + size_t size = AllocSize(rep->capacity_); + rep->~CordRepRing(); + ::operator delete(rep, size); +#else + rep->~CordRepRing(); + ::operator delete(rep); +#endif +} + +void CordRepRing::Destroy(CordRepRing* rep) { + UnrefEntries(rep, rep->head(), rep->tail()); + Delete(rep); +} + +template <bool ref> +void CordRepRing::Fill(const CordRepRing* src, index_type head, + index_type tail) { + this->length = src->length; + head_ = 0; + tail_ = advance(0, src->entries(head, tail)); + begin_pos_ = src->begin_pos_; + + // TODO(mvels): there may be opportunities here for large buffers. + auto* dst_pos = entry_end_pos(); + auto* dst_child = entry_child(); + auto* dst_offset = entry_data_offset(); + src->ForEach(head, tail, [&](index_type index) { + *dst_pos++ = src->entry_end_pos(index); + CordRep* child = src->entry_child(index); + *dst_child++ = ref ? CordRep::Ref(child) : child; + *dst_offset++ = src->entry_data_offset(index); + }); +} + +CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head, + index_type tail, size_t extra) { + CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra); + newrep->Fill<true>(rep, head, tail); + CordRep::Unref(rep); + return newrep; +} + +CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { + // Get current number of entries, and check for max capacity. + size_t entries = rep->entries(); + + if (!rep->refcount.IsMutable()) { + return Copy(rep, rep->head(), rep->tail(), extra); + } else if (entries + extra > rep->capacity()) { + const size_t min_grow = rep->capacity() + rep->capacity() / 2; + const size_t min_extra = (std::max)(extra, min_grow - entries); + CordRepRing* newrep = CordRepRing::New(entries, min_extra); + newrep->Fill<false>(rep, rep->head(), rep->tail()); + CordRepRing::Delete(rep); + return newrep; + } else { + return rep; + } +} + +Span<char> CordRepRing::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + index_type back = retreat(tail_); + CordRep* child = entry_child(back); + if (child->tag >= FLAT && child->refcount.IsMutable()) { + size_t capacity = child->flat()->Capacity(); + pos_type end_pos = entry_end_pos(back); + size_t data_offset = entry_data_offset(back); + size_t entry_length = Distance(entry_begin_pos(back), end_pos); + size_t used = data_offset + entry_length; + if (size_t n = (std::min)(capacity - used, size)) { + child->length = data_offset + entry_length + n; + entry_end_pos()[back] = end_pos + n; + this->length += n; + return {child->flat()->Data() + used, n}; + } + } + return {nullptr, 0}; +} + +Span<char> CordRepRing::GetPrependBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRep* child = entry_child(head_); + size_t data_offset = entry_data_offset(head_); + if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { + size_t n = (std::min)(data_offset, size); + this->length += n; + begin_pos_ -= n; + data_offset -= n; + entry_data_offset()[head_] = static_cast<offset_type>(data_offset); + return {child->flat()->Data() + data_offset, n}; + } + return {nullptr, 0}; +} + +CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, + size_t len, size_t extra) { + CordRepRing* rep = CordRepRing::New(1, extra); + rep->head_ = 0; + rep->tail_ = rep->advance(0); + rep->length = len; + rep->entry_end_pos()[0] = len; + rep->entry_child()[0] = child; + rep->entry_data_offset()[0] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { + CordRepRing* rep = nullptr; + Consume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = rep ? AppendLeaf(rep, child_arg, offset, len) + : CreateFromLeaf(child_arg, offset, len, extra); + } else if (rep) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else if (offset == 0 && child_arg->length == len) { + rep = Mutable(child_arg->ring(), extra); + } else { + rep = SubRing(child_arg->ring(), offset, len, extra); + } + }); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return CreateFromLeaf(child, 0, length, extra); + } + if (child->IsRing()) { + return Mutable(child->ring(), extra); + } + return CreateSlow(child, extra); +} + +template <CordRepRing::AddMode mode> +CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len) { + assert(offset < ring->length); + constexpr bool append = mode == AddMode::kAppend; + Position head = ring->Find(offset); + Position tail = ring->FindTail(head.index, offset + len); + const index_type entries = ring->entries(head.index, tail.index); + + rep = Mutable(rep, entries); + + // The delta for making ring[head].end_pos into 'len - offset' + const pos_type delta_length = + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - len) - + ring->entry_begin_pos(head.index) - head.offset; + + // Start filling at `tail`, or `entries` before `head` + Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries)); + + if (ring->refcount.IsOne()) { + // Copy entries from source stealing the ref and adjusting the end position. + // Commit the filler as this is no-op. + ring->ForEach(head.index, tail.index, [&](index_type ix) { + filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + }); + + // Unref entries we did not copy over, and delete source. + if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index); + if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_); + CordRepRing::Delete(ring); + } else { + ring->ForEach(head.index, tail.index, [&](index_type ix) { + CordRep* child = ring->entry_child(ix); + filler.Add(child, ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + CordRep::Ref(child); + }); + CordRepRing::Unref(ring); + } + + if (head.offset) { + // Increase offset of first 'source' entry appended or prepended. + // This is always the entry in `filler.head()` + rep->AddDataOffset(filler.head(), head.offset); + } + + if (tail.offset) { + // Reduce length of last 'source' entry appended or prepended. + // This is always the entry tailed by `filler.pos()` + rep->SubLength(rep->retreat(filler.pos()), tail.offset); + } + + // Commit changes + rep->length += len; + if (append) { + rep->tail_ = filler.pos(); + } else { + rep->head_ = filler.head(); + rep->begin_pos_ -= len; + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { + Consume(child, [&rep](CordRep* child_arg, size_t offset, size_t len) { + if (child_arg->IsRing()) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else { + rep = AppendLeaf(rep, child_arg, offset, len); + } + }); + return rep; +} + +CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type back = rep->tail_; + const pos_type begin_pos = rep->begin_pos_ + rep->length; + rep->tail_ = rep->advance(rep->tail_); + rep->length += len; + rep->entry_end_pos()[back] = begin_pos + len; + rep->entry_child()[back] = child; + rep->entry_data_offset()[back] = static_cast<offset_type>(offset); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return AppendLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); + } + return AppendSlow(rep, child); +} + +CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { + ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = PrependLeaf(rep, child_arg, offset, len); + } else { + rep = AddRing<AddMode::kPrepend>(rep, child_arg->ring(), offset, len); + } + }); + return Validate(rep); +} + +CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type head = rep->retreat(rep->head_); + pos_type end_pos = rep->begin_pos_; + rep->head_ = head; + rep->length += len; + rep->begin_pos_ -= len; + rep->entry_end_pos()[head] = end_pos; + rep->entry_child()[head] = child; + rep->entry_data_offset()[head] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return PrependLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); + } + return PrependSlow(rep, child); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, y_absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetAppendBuffer(data.length()); + if (!avail.empty()) { + memcpy(avail.data(), data.data(), avail.length()); + data.remove_prefix(avail.length()); + } + } + if (data.empty()) return Validate(rep); + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + + Filler filler(rep, rep->tail_); + pos_type pos = rep->begin_pos_ + rep->length; + + while (data.length() >= kMaxFlatLength) { + auto* flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos += kMaxFlatLength); + data.remove_prefix(kMaxFlatLength); + } + + if (data.length()) { + auto* flat = CreateFlat(data.data(), data.length(), extra); + filler.Add(flat, 0, pos += data.length()); + } + + rep->length = pos - rep->begin_pos_; + rep->tail_ = filler.pos(); + + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, y_absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetPrependBuffer(data.length()); + if (!avail.empty()) { + const char* tail = data.data() + data.length() - avail.length(); + memcpy(avail.data(), tail, avail.length()); + data.remove_suffix(avail.length()); + } + } + if (data.empty()) return rep; + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + pos_type pos = rep->begin_pos_; + Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats))); + + size_t first_size = data.size() - (flats - 1) * kMaxFlatLength; + CordRepFlat* flat = CordRepFlat::New(first_size + extra); + flat->length = first_size + extra; + memcpy(flat->Data() + extra, data.data(), first_size); + data.remove_prefix(first_size); + filler.Add(flat, extra, pos); + pos -= first_size; + + while (!data.empty()) { + assert(data.size() >= kMaxFlatLength); + flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos); + pos -= kMaxFlatLength; + data.remove_prefix(kMaxFlatLength); + } + + rep->head_ = filler.head(); + rep->length += rep->begin_pos_ - pos; + rep->begin_pos_ = pos; + + return Validate(rep); +} + +// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86 +static constexpr index_type kBinarySearchThreshold = 32; +static constexpr index_type kBinarySearchEndCount = 8; + +template <bool wrap> +CordRepRing::index_type CordRepRing::FindBinary(index_type head, + index_type tail, + size_t offset) const { + index_type count = tail + (wrap ? capacity_ : 0) - head; + do { + count = (count - 1) / 2; + assert(count < entries(head, tail_)); + index_type mid = wrap ? advance(head, count) : head + count; + index_type after_mid = wrap ? advance(mid) : mid + 1; + bool larger = (offset >= entry_end_offset(mid)); + head = larger ? after_mid : head; + tail = larger ? tail : mid; + assert(head != tail); + } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount)); + return head; +} + +CordRepRing::Position CordRepRing::FindSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, offset); + } + } + + pos_type pos = entry_begin_pos(head); + pos_type end_pos = entry_end_pos(head); + while (offset >= Distance(begin_pos_, end_pos)) { + head = advance(head); + pos = end_pos; + end_pos = entry_end_pos(head); + } + + return {head, offset - Distance(begin_pos_, pos)}; +} + +CordRepRing::Position CordRepRing::FindTailSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + const size_t tail_offset = offset - 1; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, tail_offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, tail_offset); + } + } + + size_t end_offset = entry_end_offset(head); + while (tail_offset >= end_offset) { + head = advance(head); + end_offset = entry_end_offset(head); + } + + return {advance(head), end_offset - offset}; +} + +char CordRepRing::GetCharacter(size_t offset) const { + assert(offset < length); + + Position pos = Find(offset); + size_t data_offset = entry_data_offset(pos.index) + pos.offset; + return GetRepData(entry_child(pos.index))[data_offset]; +} + +CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, + size_t len, size_t extra) { + assert(offset <= rep->length); + assert(offset <= rep->length - len); + + if (len == 0) { + CordRep::Unref(rep); + return nullptr; + } + + // Find position of first byte + Position head = rep->Find(offset); + Position tail = rep->FindTail(head.index, offset + len); + const size_t new_entries = rep->entries(head.index, tail.index); + + if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { + // We adopt a privately owned rep and no extra entries needed. + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->head_ = head.index; + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, head.index, tail.index, extra); + head.index = rep->head_; + tail.index = rep->tail_; + } + + // Adjust begin_pos and length + rep->length = len; + rep->begin_pos_ += offset; + + // Adjust head and tail blocks + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position head = rep->Find(len); + if (rep->refcount.IsMutable()) { + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + rep->head_ = head.index; + } else { + rep = Copy(rep, head.index, rep->tail_, extra); + head.index = rep->head_; + } + + // Adjust begin_pos and length + rep->length -= len; + rep->begin_pos_ += len; + + // Adjust head block + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position tail = rep->FindTail(rep->length - len); + if (rep->refcount.IsMutable()) { + // We adopt a privately owned rep, scrub. + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, rep->head_, tail.index, extra); + tail.index = rep->tail_; + } + + // Adjust length + rep->length -= len; + + // Adjust tail block + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.h new file mode 100644 index 0000000000..5f9784d8da --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring.h @@ -0,0 +1,607 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <memory> + +#include "y_absl/container/internal/layout.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_flat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// All operations modifying a ring buffer are implemented as static methods +// requiring a CordRepRing instance with a reference adopted by the method. +// +// The methods return the modified ring buffer, which may be equal to the input +// if the input was not shared, and having large enough capacity to accommodate +// any newly added node(s). Otherwise, a copy of the input rep with the new +// node(s) added is returned. +// +// Any modification on non shared ring buffers with enough capacity will then +// require minimum atomic operations. Caller should where possible provide +// reasonable `extra` hints for both anticipated extra `flat` byte space, as +// well as anticipated extra nodes required for complex operations. +// +// Example of code creating a ring buffer, adding some data to it, +// and discarding the buffer when done: +// +// void FunWithRings() { +// // Create ring with 3 flats +// CordRep* flat = CreateFlat("Hello"); +// CordRepRing* ring = CordRepRing::Create(flat, 2); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +// Example of code Copying an existing ring buffer and modifying it: +// +// void MoreFunWithRings(CordRepRing* src) { +// CordRepRing* ring = CordRep::Ref(src)->ring(); +// ring = CordRepRing::Append(ring, CreateFlat("Hello")); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +class CordRepRing : public CordRep { + public: + // `pos_type` represents a 'logical position'. A CordRepRing instance has a + // `begin_pos` (default 0), and each node inside the buffer will have an + // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus + // this node's length. The purpose is to allow for a binary search on this + // position, while allowing O(1) prepend and append operations. + using pos_type = size_t; + + // `index_type` is the type for the `head`, `tail` and `capacity` indexes. + // Ring buffers are limited to having no more than four billion entries. + using index_type = uint32_t; + + // `offset_type` is the type for the data offset inside a child rep's data. + using offset_type = uint32_t; + + // Position holds the node index and relative offset into the node for + // some physical offset in the contained data as returned by the Find() + // and FindTail() methods. + struct Position { + index_type index; + size_t offset; + }; + + // The maximum # of child nodes that can be hosted inside a CordRepRing. + static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)(); + + // CordRepring can not be default constructed, moved, copied or assigned. + CordRepRing() = delete; + CordRepRing(const CordRepRing&) = delete; + CordRepRing& operator=(const CordRepRing&) = delete; + + // Returns true if this instance is valid, false if some or all of the + // invariants are broken. Intended for debug purposes only. + // `output` receives an explanation of the broken invariants. + bool IsValid(std::ostream& output) const; + + // Returns the size in bytes for a CordRepRing with `capacity' entries. + static constexpr size_t AllocSize(size_t capacity); + + // Returns the distance in bytes from `pos` to `end_pos`. + static constexpr size_t Distance(pos_type pos, pos_type end_pos); + + // Creates a new ring buffer from the provided `rep`. Adopts a reference + // on `rep`. The returned ring buffer has a capacity of at least `extra + 1` + static CordRepRing* Create(CordRep* child, size_t extra = 0); + + // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries. + index_type head() const { return head_; } + index_type tail() const { return tail_; } + index_type capacity() const { return capacity_; } + + // Returns the number of entries in this instance. + index_type entries() const { return entries(head_, tail_); } + + // Returns the logical begin position of this instance. + pos_type begin_pos() const { return begin_pos_; } + + // Returns the number of entries for a given head-tail range. + // Requires `head` and `tail` values to be less than `capacity()`. + index_type entries(index_type head, index_type tail) const { + assert(head < capacity_ && tail < capacity_); + return tail - head + ((tail > head) ? 0 : capacity_); + } + + // Returns the logical end position of entry `index`. + pos_type const& entry_end_pos(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial().Pointer<0>(data_)[index]; + } + + // Returns the child pointer of entry `index`. + CordRep* const& entry_child(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity()).Pointer<1>(data_)[index]; + } + + // Returns the data offset of entry `index` + offset_type const& entry_data_offset(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index]; + } + + // Appends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is added + // 'as is', with an offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or + // CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Append(CordRepRing* rep, CordRep* child); + + // Appends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the last + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any last node added to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent append actions. + static CordRepRing* Append(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Prepends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is + // prepended 'as is', with an optional offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING + // or CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Prepend(CordRepRing* rep, CordRep* child); + + // Prepends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the first + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any first node prepnded to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent prepend actions. + static CordRepRing* Prepend(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Returns a span referencing potentially unused capacity in the last node. + // The returned span may be empty if no such capacity is available, or if the + // current instance is shared. Else, a span of size `n <= size` is returned. + // If non empty, the ring buffer is adjusted to the new length, with the newly + // added capacity left uninitialized. Callers should assign a value to the + // entire span before any other operations on this instance. + Span<char> GetAppendBuffer(size_t size); + + // Returns a span referencing potentially unused capacity in the first node. + // This function is identical to GetAppendBuffer except that it returns a span + // referencing up to `size` capacity directly before the existing data. + Span<char> GetPrependBuffer(size_t size); + + // Returns a cord ring buffer containing `len` bytes of data starting at + // `offset`. If the input is not shared, this function will remove all head + // and tail child nodes outside of the requested range, and adjust the new + // head and tail nodes as required. If the input is shared, this function + // returns a new instance sharing some or all of the nodes from the input. + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the first `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the last `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemovePrefix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns the character at `offset`. Requires that `offset < length`. + char GetCharacter(size_t offset) const; + + // Returns true if this instance manages a single contiguous buffer, in which + // case the (optional) output parameter `fragment` is set. Otherwise, the + // function returns false, and `fragment` is left unchanged. + bool IsFlat(y_absl::string_view* fragment) const; + + // Returns true if the data starting at `offset` with length `len` is + // managed by this instance inside a single contiguous buffer, in which case + // the (optional) output parameter `fragment` is set to the contiguous memory + // starting at offset `offset` with length `length`. Otherwise, the function + // returns false, and `fragment` is left unchanged. + bool IsFlat(size_t offset, size_t len, y_absl::string_view* fragment) const; + + // Testing only: set capacity to requested capacity. + void SetCapacityForTesting(size_t capacity); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep. + static const char* GetLeafData(const CordRep* rep); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep. + static const char* GetRepData(const CordRep* rep); + + // Advances the provided position, wrapping around capacity as needed. + // Requires `index` < capacity() + inline index_type advance(index_type index) const; + + // Advances the provided position by 'n`, wrapping around capacity as needed. + // Requires `index` < capacity() and `n` <= capacity. + inline index_type advance(index_type index, index_type n) const; + + // Retreats the provided position, wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index) const; + + // Retreats the provided position by 'n', wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index, index_type n) const; + + // Returns the logical begin position of entry `index` + pos_type const& entry_begin_pos(index_type index) const { + return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index)); + } + + // Returns the physical start offset of entry `index` + size_t entry_start_offset(index_type index) const { + return Distance(begin_pos_, entry_begin_pos(index)); + } + + // Returns the physical end offset of entry `index` + size_t entry_end_offset(index_type index) const { + return Distance(begin_pos_, entry_end_pos(index)); + } + + // Returns the data length for entry `index` + size_t entry_length(index_type index) const { + return Distance(entry_begin_pos(index), entry_end_pos(index)); + } + + // Returns the data for entry `index` + y_absl::string_view entry_data(index_type index) const; + + // Returns the position for `offset` as {index, prefix}. `index` holds the + // index of the entry at the specified offset and `prefix` holds the relative + // offset inside that entry. + // Requires `offset` < length. + // + // For example we can implement GetCharacter(offset) as: + // char GetCharacter(size_t offset) { + // Position pos = this->Find(offset); + // return this->entry_data(pos.pos)[pos.offset]; + // } + inline Position Find(size_t offset) const; + + // Find starting at `head` + inline Position Find(index_type head, size_t offset) const; + + // Returns the tail position for `offset` as {tail index, suffix}. + // `tail index` holds holds the index of the entry holding the offset directly + // before 'offset` advanced by one. 'suffix` holds the relative offset from + // that relative offset in the entry to the end of the entry. + // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5) + // will return {retreat(tail), 5)} provided the preceding entry contains at + // least 5 bytes of data. + // Requires offset >= 1 && offset <= length. + // + // This function is very useful in functions that need to clip the end of some + // ring buffer such as 'RemovePrefix'. + // For example, we could implement RemovePrefix for non shared instances as: + // void RemoveSuffix(size_t n) { + // Position pos = FindTail(length - n); + // UnrefEntries(pos.pos, this->tail_); + // this->tail_ = pos.pos; + // entry(retreat(pos.pos)).end_pos -= pos.offset; + // } + inline Position FindTail(size_t offset) const; + + // Find tail starting at `head` + inline Position FindTail(index_type head, size_t offset) const; + + // Invokes f(index_type index) for each entry inside the range [head, tail> + template <typename F> + void ForEach(index_type head, index_type tail, F&& f) const { + index_type n1 = (tail > head) ? tail : capacity_; + for (index_type i = head; i < n1; ++i) f(i); + if (tail <= head) { + for (index_type i = 0; i < tail; ++i) f(i); + } + } + + // Invokes f(index_type index) for each entry inside this instance. + template <typename F> + void ForEach(F&& f) const { + ForEach(head_, tail_, std::forward<F>(f)); + } + + // Dump this instance's data tp stream `s` in human readable format, excluding + // the actual data content itself. Intended for debug purposes only. + friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + + private: + enum class AddMode { kAppend, kPrepend }; + + using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>; + + class Filler; + class Transaction; + class CreateTransaction; + + static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment(); + + // Creates a new CordRepRing. + explicit CordRepRing(index_type capacity) : capacity_(capacity) {} + + // Returns true if `index` is a valid index into this instance. + bool IsValidIndex(index_type index) const; + + // Debug use only: validates the provided CordRepRing invariants. + // Verification of all CordRepRing methods can be enabled by defining + // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION` + // Verification is VERY expensive, so only do it for debugging purposes. + static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr, + int line = 0); + + // Allocates a CordRepRing large enough to hold `capacity + extra' entries. + // The returned capacity may be larger if the allocated memory allows for it. + // The maximum capacity of a CordRepRing is capped at kMaxCapacity. + // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity. + static CordRepRing* New(size_t capacity, size_t extra); + + // Deallocates (but does not destroy) the provided ring buffer. + static void Delete(CordRepRing* rep); + + // Destroys the provided ring buffer, decrementing the reference count of all + // contained child CordReps. The provided 1\`rep` should have a ref count of + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). + static void Destroy(CordRepRing* rep); + + // Returns a mutable reference to the logical end position array. + pos_type* entry_end_pos() { + return Layout::Partial().Pointer<0>(data_); + } + + // Returns a mutable reference to the child pointer array. + CordRep** entry_child() { + return Layout::Partial(capacity()).Pointer<1>(data_); + } + + // Returns a mutable reference to the data offset array. + offset_type* entry_data_offset() { + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_); + } + + // Find implementations for the non fast path 0 / length cases. + Position FindSlow(index_type head, size_t offset) const; + Position FindTailSlow(index_type head, size_t offset) const; + + // Finds the index of the first node that is inside a reasonable distance + // of the node at `offset` from which we can continue with a linear search. + template <bool wrap> + index_type FindBinary(index_type head, index_type tail, size_t offset) const; + + // Fills the current (initialized) instance from the provided source, copying + // entries [head, tail). Adds a reference to copied entries if `ref` is true. + template <bool ref> + void Fill(const CordRepRing* src, index_type head, index_type tail); + + // Create a copy of 'rep', copying all entries [head, tail), allocating room + // for `extra` entries. Adds a reference on all copied entries. + static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail, + size_t extra = 0); + + // Returns a Mutable CordRepRing reference from `rep` with room for at least + // `extra` additional nodes. Adopts a reference count from `rep`. + // This function will return `rep` if, and only if: + // - rep.entries + extra <= rep.capacity + // - rep.refcount == 1 + // Otherwise, this function will create a new copy of `rep` with additional + // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. + // + // If a new CordRepRing can not be allocated, or the new capacity would exceed + // the maxmimum capacity, then the input is consumed only, and an exception is + // thrown. + static CordRepRing* Mutable(CordRepRing* rep, size_t extra); + + // Slow path for Append(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Append() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child); + + // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child); + + // Slow path for Create(CordRep* child, size_t extra). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* CreateSlow(CordRep* child, size_t extra); + + // Creates a new ring buffer from the provided `child` leaf node. Requires + // `child` to be FLAT or EXTERNAL. on `rep`. + // The returned ring buffer has a capacity of at least `1 + extra` + static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra); + + // Appends or prepends (depending on AddMode) the ring buffer in `ring' to + // `rep` starting at `offset` with length `len`. + template <AddMode mode> + static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len); + + // Increases the data offset for entry `index` by `n`. + void AddDataOffset(index_type index, size_t n); + + // Descreases the length for entry `index` by `n`. + void SubLength(index_type index, size_t n); + + index_type head_; + index_type tail_; + index_type capacity_; + pos_type begin_pos_; + + alignas(kLayoutAlignment) char data_[kLayoutAlignment]; + + friend struct CordRep; +}; + +constexpr size_t CordRepRing::AllocSize(size_t capacity) { + return sizeof(CordRepRing) - sizeof(data_) + + Layout(capacity, capacity, capacity).AllocSize(); +} + +inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) { + return (end_pos - pos); +} + +inline const char* CordRepRing::GetLeafData(const CordRep* rep) { + return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base; +} + +inline const char* CordRepRing::GetRepData(const CordRep* rep) { + if (rep->tag >= FLAT) return rep->flat()->Data(); + if (rep->tag == EXTERNAL) return rep->external()->base; + return GetLeafData(rep->substring()->child) + rep->substring()->start; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index) const { + assert(index < capacity_); + return ++index == capacity_ ? 0 : index; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return (index += n) >= capacity_ ? index - capacity_ : index; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index) const { + assert(index < capacity_); + return (index > 0 ? index : capacity_) - 1; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return index >= n ? index - n : capacity_ - n + index; +} + +inline y_absl::string_view CordRepRing::entry_data(index_type index) const { + size_t data_offset = entry_data_offset(index); + return {GetRepData(entry_child(index)) + data_offset, entry_length(index)}; +} + +inline bool CordRepRing::IsValidIndex(index_type index) const { + if (index >= capacity_) return false; + return (tail_ > head_) ? (index >= head_ && index < tail_) + : (index >= head_ || index < tail_); +} + +#ifndef EXTRA_CORD_RING_VALIDATION +inline CordRepRing* CordRepRing::Validate(CordRepRing* rep, + const char* /*file*/, int /*line*/) { + return rep; +} +#endif + +inline CordRepRing::Position CordRepRing::Find(size_t offset) const { + assert(offset < length); + return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::Find(index_type head, + size_t offset) const { + assert(offset < length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head)); + return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const { + assert(offset > 0 && offset <= length); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(index_type head, + size_t offset) const { + assert(offset > 0 && offset <= length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset); +} + +// Now that CordRepRing is defined, we can define CordRep's helper casts: +inline CordRepRing* CordRep::ring() { + assert(IsRing()); + return static_cast<CordRepRing*>(this); +} + +inline const CordRepRing* CordRep::ring() const { + assert(IsRing()); + return static_cast<const CordRepRing*>(this); +} + +inline bool CordRepRing::IsFlat(y_absl::string_view* fragment) const { + if (entries() == 1) { + if (fragment) *fragment = entry_data(head()); + return true; + } + return false; +} + +inline bool CordRepRing::IsFlat(size_t offset, size_t len, + y_absl::string_view* fragment) const { + const Position pos = Find(offset); + const y_absl::string_view data = entry_data(pos.index); + if (data.length() >= len && data.length() - len >= pos.offset) { + if (fragment) *fragment = data.substr(pos.offset, len); + return true; + } + return false; +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring_reader.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring_reader.h new file mode 100644 index 0000000000..3f64d04fae --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_ring_reader.h @@ -0,0 +1,118 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> + +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_ring.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepRingReader provides basic navigation over CordRepRing data. +class CordRepRingReader { + public: + // Returns true if this instance is not empty. + explicit operator bool() const { return ring_ != nullptr; } + + // Returns the ring buffer reference for this instance, or nullptr if empty. + CordRepRing* ring() const { return ring_; } + + // Returns the current node index inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRepRing::index_type index() const { return index_; } + + // Returns the current node inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRep* node() const { return ring_->entry_child(index_); } + + // Returns the length of the referenced ring buffer. + // Requires the current instance to be non empty. + size_t length() const { + assert(ring_); + return ring_->length; + } + + // Returns the end offset of the last navigated-to chunk, which represents the + // total bytes 'consumed' relative to the start of the ring. The returned + // value is never zero. For example, initializing a reader with a ring buffer + // with a first chunk of 19 bytes will return consumed() = 19. + // Requires the current instance to be non empty. + size_t consumed() const { + assert(ring_); + return ring_->entry_end_offset(index_); + } + + // Returns the number of bytes remaining beyond the last navigated-to chunk. + // Requires the current instance to be non empty. + size_t remaining() const { + assert(ring_); + return length() - consumed(); + } + + // Resets this instance to an empty value + void Reset() { ring_ = nullptr; } + + // Resets this instance to the start of `ring`. `ring` must not be null. + // Returns a reference into the first chunk of the provided ring. + y_absl::string_view Reset(CordRepRing* ring) { + assert(ring); + ring_ = ring; + index_ = ring_->head(); + return ring_->entry_data(index_); + } + + // Navigates to the next chunk inside the reference ring buffer. + // Returns a reference into the navigated-to chunk. + // Requires remaining() to be non zero. + y_absl::string_view Next() { + assert(remaining()); + index_ = ring_->advance(index_); + return ring_->entry_data(index_); + } + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated-to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling Seek(13) on a + // ring buffer containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Requires `offset` to be less than `length()` + y_absl::string_view Seek(size_t offset) { + assert(offset < length()); + size_t current = ring_->entry_end_offset(index_); + CordRepRing::index_type hint = (offset >= current) ? index_ : ring_->head(); + const CordRepRing::Position head = ring_->Find(hint, offset); + index_ = head.index; + auto data = ring_->entry_data(head.index); + data.remove_prefix(head.offset); + return data; + } + + private: + CordRepRing* ring_ = nullptr; + CordRepRing::index_type index_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_test_util.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_test_util.h new file mode 100644 index 0000000000..98dcc0d649 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cord_rep_test_util.h @@ -0,0 +1,220 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ + +#include <cassert> +#include <memory> +#include <random> +#include <util/generic/string.h> +#include <vector> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_flat.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cordrep_testing { + +inline cord_internal::CordRepSubstring* MakeSubstring( + size_t start, size_t len, cord_internal::CordRep* rep) { + auto* sub = new cord_internal::CordRepSubstring; + sub->tag = cord_internal::SUBSTRING; + sub->start = start; + sub->length = len <= 0 ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left, + cord_internal::CordRep* right, + int depth = 0) { + auto* concat = new cord_internal::CordRepConcat; + concat->tag = cord_internal::CONCAT; + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + concat->set_depth(depth); + return concat; +} + +inline cord_internal::CordRepFlat* MakeFlat(y_absl::string_view value) { + assert(value.length() <= cord_internal::kMaxFlatLength); + auto* flat = cord_internal::CordRepFlat::New(value.length()); + flat->length = value.length(); + memcpy(flat->Data(), value.data(), value.length()); + return flat; +} + +// Creates an external node for testing +inline cord_internal::CordRepExternal* MakeExternal(y_absl::string_view s) { + struct Rep : public cord_internal::CordRepExternal { + TString s; + explicit Rep(y_absl::string_view sv) : s(sv) { + this->tag = cord_internal::EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](cord_internal::CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +inline TString CreateRandomString(size_t n) { + y_absl::string_view data = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789~!@#$%^&*()_+=-<>?:\"{}[]|"; + std::minstd_rand rnd; + std::uniform_int_distribution<size_t> dist(0, data.size() - 1); + TString s(n, ' '); + for (size_t i = 0; i < n; ++i) { + s[i] = data[dist(rnd)]; + } + return s; +} + +// Creates an array of flats from the provided string, chopping +// the provided string up into flats of size `chunk_size` characters +// resulting in roughly `data.size() / chunk_size` total flats. +inline std::vector<cord_internal::CordRep*> CreateFlatsFromString( + y_absl::string_view data, size_t chunk_size) { + assert(chunk_size > 0); + std::vector<cord_internal::CordRep*> flats; + for (y_absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) { + flats.push_back(MakeFlat(s.substr(0, chunk_size))); + } + return flats; +} + +inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( + y_absl::Span<cord_internal::CordRep* const> flats) { + assert(!flats.empty()); + auto* node = cord_internal::CordRepBtree::Create(flats[0]); + for (size_t i = 1; i < flats.size(); ++i) { + node = cord_internal::CordRepBtree::Append(node, flats[i]); + } + return node; +} + +template <typename Fn> +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } else if (rep->tag == cord_internal::CONCAT) { + CordVisitReps(rep->concat()->left, fn); + CordVisitReps(rep->concat()->right, fn); + } +} + +template <typename Predicate> +inline std::vector<cord_internal::CordRep*> CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector<cord_internal::CordRep*> CordCollectReps( + cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + +inline void CordToString(cord_internal::CordRep* rep, TString& s) { + size_t offset = 0; + size_t length = rep->length; + while (rep->tag == cord_internal::SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordToString(edge, s); + } + } else if (rep->tag >= cord_internal::FLAT) { + s.append(rep->flat()->Data() + offset, length); + } else if (rep->tag == cord_internal::EXTERNAL) { + s.append(rep->external()->base + offset, length); + } else { + ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag); + } +} + +inline TString CordToString(cord_internal::CordRep* rep) { + TString s; + s.reserve(rep->length); + CordToString(rep, s); + return s; +} + +// RAII Helper class to automatically unref reps on destruction. +class AutoUnref { + public: + ~AutoUnref() { + for (CordRep* rep : unrefs_) CordRep::Unref(rep); + } + + // Adds `rep` to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Add(CordRepType* rep) { + unrefs_.push_back(rep); + return rep; + } + + // Increments the reference count of `rep` by one, and adds it to + // the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + // Increments the reference count of `rep` by one if `condition` is true, + // and adds it to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* RefIf(bool condition, CordRepType* rep) { + if (condition) unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + private: + using CordRep = y_absl::cord_internal::CordRep; + + std::vector<CordRep*> unrefs_; +}; + +} // namespace cordrep_testing +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.cc new file mode 100644 index 0000000000..e9936f22fe --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.cc @@ -0,0 +1,96 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cordz_functions.h" + +#include <atomic> +#include <cmath> +#include <limits> +#include <random> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/profiling/internal/exponential_biased.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +// The average interval until the next sample. A value of 0 disables profiling +// while a value of 1 will profile all Cords. +std::atomic<int> g_cordz_mean_interval(50000); + +} // namespace + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// Special negative 'not initialized' per thread value for cordz_next_sample. +static constexpr int64_t kInitCordzNextSample = -1; + +ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; + +// kIntervalIfDisabled is the number of profile-eligible events need to occur +// before the code will confirm that cordz is still disabled. +constexpr int64_t kIntervalIfDisabled = 1 << 16; + +ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { + + thread_local y_absl::profiling_internal::ExponentialBiased + exponential_biased_generator; + int32_t mean_interval = get_cordz_mean_interval(); + + // Check if we disabled profiling. If so, set the next sample to a "large" + // number to minimize the overhead of the should_profile codepath. + if (mean_interval <= 0) { + cordz_next_sample = kIntervalIfDisabled; + return false; + } + + // Check if we're always sampling. + if (mean_interval == 1) { + cordz_next_sample = 1; + return true; + } + + if (cordz_next_sample <= 0) { + // If first check on current thread, check cordz_should_profile() + // again using the created (initial) stride in cordz_next_sample. + const bool initialized = cordz_next_sample != kInitCordzNextSample; + cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); + return initialized || cordz_should_profile(); + } + + --cordz_next_sample; + return false; +} + +void cordz_set_next_sample_for_testing(int64_t next_sample) { + cordz_next_sample = next_sample; +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +int32_t get_cordz_mean_interval() { + return g_cordz_mean_interval.load(std::memory_order_acquire); +} + +void set_cordz_mean_interval(int32_t mean_interval) { + g_cordz_mean_interval.store(mean_interval, std::memory_order_release); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.h new file mode 100644 index 0000000000..802efaa976 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions.h @@ -0,0 +1,85 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_FUNCTIONS_H_ +#define ABSL_STRINGS_CORDZ_FUNCTIONS_H_ + +#include <stdint.h> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/base/optimization.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns the current sample rate. This represents the average interval +// between samples. +int32_t get_cordz_mean_interval(); + +// Sets the sample rate with the average interval between samples. +void set_cordz_mean_interval(int32_t mean_interval); + +// Enable cordz unless any of the following applies: +// - no thread local support +// - MSVC build +// - Android build +// - Apple build +// - DLL build +// Hashtablez is turned off completely in opensource builds. +// MSVC's static atomics are dynamically initialized in debug mode, which breaks +// sampling. +#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \ + !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \ + !defined(__ANDROID__) && !defined(__APPLE__) +#define ABSL_INTERNAL_CORDZ_ENABLED 1 +#endif + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// cordz_next_sample is the number of events until the next sample event. If +// the value is 1 or less, the code will check on the next event if cordz is +// enabled, and if so, will sample the Cord. cordz is only enabled when we can +// use thread locals. +ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; + +// Determines if the next sample should be profiled. If it is, the value pointed +// at by next_sample will be set with the interval until the next sample. +bool cordz_should_profile_slow(); + +// Returns true if the next cord should be sampled. +inline bool cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { + cordz_next_sample--; + return false; + } + return cordz_should_profile_slow(); +} + +// Sets the interval until the next sample (for testing only) +void cordz_set_next_sample_for_testing(int64_t next_sample); + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +inline bool cordz_should_profile() { return false; } +inline void cordz_set_next_sample_for_testing(int64_t) {} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORDZ_FUNCTIONS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions/ya.make new file mode 100644 index 0000000000..06e99346da --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions/ya.make @@ -0,0 +1,32 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/profiling/internal/exponential_biased +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + cordz_functions.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.cc new file mode 100644 index 0000000000..707c6d2a9b --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.cc @@ -0,0 +1,139 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "y_absl/strings/internal/cordz_handle.h" + +#include <atomic> + +#include "y_absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK +#include "y_absl/base/internal/spinlock.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::y_absl::base_internal::SpinLockHolder; + +ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(y_absl::kConstInit); + +CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + if (is_snapshot) { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + dq_prev_ = dq_tail; + dq_tail->dq_next_ = this; + } + queue_->dq_tail.store(this, std::memory_order_release); + } +} + +CordzHandle::~CordzHandle() { + ODRCheck(); + if (is_snapshot_) { + std::vector<CordzHandle*> to_delete; + { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* next = dq_next_; + if (dq_prev_ == nullptr) { + // We were head of the queue, delete every CordzHandle until we reach + // either the end of the list, or a snapshot handle. + while (next && !next->is_snapshot_) { + to_delete.push_back(next); + next = next->dq_next_; + } + } else { + // Another CordzHandle existed before this one, don't delete anything. + dq_prev_->dq_next_ = next; + } + if (next) { + next->dq_prev_ = dq_prev_; + } else { + queue_->dq_tail.store(dq_prev_, std::memory_order_release); + } + } + for (CordzHandle* handle : to_delete) { + delete handle; + } + } +} + +bool CordzHandle::SafeToDelete() const { + return is_snapshot_ || queue_->IsEmpty(); +} + +void CordzHandle::Delete(CordzHandle* handle) { + assert(handle); + if (handle) { + handle->ODRCheck(); + Queue* const queue = handle->queue_; + if (!handle->SafeToDelete()) { + SpinLockHolder lock(&queue->mutex); + CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + handle->dq_prev_ = dq_tail; + dq_tail->dq_next_ = handle; + queue->dq_tail.store(handle, std::memory_order_release); + return; + } + } + delete handle; + } +} + +std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { + std::vector<const CordzHandle*> handles; + SpinLockHolder lock(&global_queue_.mutex); + CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { + handles.push_back(p); + } + return handles; +} + +bool CordzHandle::DiagnosticsHandleIsSafeToInspect( + const CordzHandle* handle) const { + ODRCheck(); + if (!is_snapshot_) return false; + if (handle == nullptr) return true; + if (handle->is_snapshot_) return false; + bool snapshot_found = false; + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + if (p == handle) return !snapshot_found; + if (p == this) snapshot_found = true; + } + ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue. + return true; +} + +std::vector<const CordzHandle*> +CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { + ODRCheck(); + std::vector<const CordzHandle*> handles; + if (!is_snapshot()) { + return handles; + } + + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { + if (!p->is_snapshot()) { + handles.push_back(p); + } + } + return handles; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.h new file mode 100644 index 0000000000..f181bc7d6b --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle.h @@ -0,0 +1,131 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_HANDLE_H_ +#define ABSL_STRINGS_CORDZ_HANDLE_H_ + +#include <atomic> +#include <vector> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/internal/spinlock.h" +#include "y_absl/synchronization/mutex.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// This base class allows multiple types of object (CordzInfo and +// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by +// global_dq_tail and traversed using dq_prev_ and dq_next_). The +// delete queue guarantees that once a profiler creates a CordzSampleToken and +// has gained visibility into a CordzInfo object, that CordzInfo object will not +// be deleted prematurely. This allows the profiler to inspect all CordzInfo +// objects that are alive without needing to hold a global lock. +class CordzHandle { + public: + CordzHandle() : CordzHandle(false) {} + + bool is_snapshot() const { return is_snapshot_; } + + // Returns true if this instance is safe to be deleted because it is either a + // snapshot, which is always safe to delete, or not included in the global + // delete queue and thus not included in any snapshot. + // Callers are responsible for making sure this instance can not be newly + // discovered by other threads. For example, CordzInfo instances first de-list + // themselves from the global CordzInfo list before determining if they are + // safe to be deleted directly. + // If SafeToDelete returns false, callers MUST use the Delete() method to + // safely queue CordzHandle instances for deletion. + bool SafeToDelete() const; + + // Deletes the provided instance, or puts it on the delete queue to be deleted + // once there are no more sample tokens (snapshot) instances potentially + // referencing the instance. `handle` should not be null. + static void Delete(CordzHandle* handle); + + // Returns the current entries in the delete queue in LIFO order. + static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue(); + + // Returns true if the provided handle is nullptr or guarded by this handle. + // Since the CordzSnapshot token is itself a CordzHandle, this method will + // allow tests to check if that token is keeping an arbitrary CordzHandle + // alive. + bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const; + + // Returns the current entries in the delete queue, in LIFO order, that are + // protected by this. CordzHandle objects are only placed on the delete queue + // after CordzHandle::Delete is called with them as an argument. Only + // CordzHandle objects that are not also CordzSnapshot objects will be + // included in the return vector. For each of the handles in the return + // vector, the earliest that their memory can be freed is when this + // CordzSnapshot object is deleted. + std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles(); + + protected: + explicit CordzHandle(bool is_snapshot); + virtual ~CordzHandle(); + + private: + // Global queue data. CordzHandle stores a pointer to the global queue + // instance to harden against ODR violations. + struct Queue { + constexpr explicit Queue(y_absl::ConstInitType) + : mutex(y_absl::kConstInit, + y_absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + y_absl::base_internal::SpinLock mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } + }; + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); +#endif + } + + ABSL_CONST_INIT static Queue global_queue_; + Queue* const queue_ = &global_queue_; + const bool is_snapshot_; + + // dq_prev_ and dq_next_ require the global queue mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that queue_ and global_queue_ are one and the same. + CordzHandle* dq_prev_ = nullptr; + CordzHandle* dq_next_ = nullptr; +}; + +class CordzSnapshot : public CordzHandle { + public: + CordzSnapshot() : CordzHandle(true) {} +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORDZ_HANDLE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle/ya.make new file mode 100644 index 0000000000..e181217139 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle/ya.make @@ -0,0 +1,47 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/low_level_alloc + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/debugging + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/stacktrace + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/symbolize + contrib/restricted/abseil-cpp-tstring/y_absl/demangle + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization/internal + contrib/restricted/abseil-cpp-tstring/y_absl/time + contrib/restricted/abseil-cpp-tstring/y_absl/time/civil_time + contrib/restricted/abseil-cpp-tstring/y_absl/time/time_zone +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + cordz_handle.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.cc new file mode 100644 index 0000000000..e3849a0b49 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.cc @@ -0,0 +1,445 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cordz_info.h" + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/spinlock.h" +#include "y_absl/container/inlined_vector.h" +#include "y_absl/debugging/stacktrace.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cord_rep_btree.h" +#include "y_absl/strings/internal/cord_rep_ring.h" +#include "y_absl/strings/internal/cordz_handle.h" +#include "y_absl/strings/internal/cordz_statistics.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" +#include "y_absl/synchronization/mutex.h" +#include "y_absl/types/span.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::y_absl::base_internal::SpinLockHolder; + +constexpr int CordzInfo::kMaxStackDepth; + +ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{y_absl::kConstInit}; + +namespace { + +// CordRepAnalyzer performs the analysis of a cord. +// +// It computes absolute node counts and total memory usage, and an 'estimated +// fair share memory usage` statistic. +// Conceptually, it divides the 'memory usage' at each location in the 'cord +// graph' by the cumulative reference count of that location. The cumulative +// reference count is the factored total of all edges leading into that node. +// +// The top level node is treated specially: we assume the current thread +// (typically called from the CordzHandler) to hold a reference purely to +// perform a safe analysis, and not being part of the application. So we +// substract 1 from the reference count of the top node to compute the +// 'application fair share' excluding the reference of the current thread. +// +// An example of fair sharing, and why we multiply reference counts: +// Assume we have 2 CordReps, both being a Substring referencing a Flat: +// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2) +// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2) +// +// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not +// referenced directly anywhere else. Translated into a 'fair share', we then +// attribute 50% of the memory (memory / refcount = 2) to each incoming edge. +// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the +// memory cost below it, i.e.: the fair share of Rep A of the memory used by C +// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'. +// It is also easy to see how all incoming edges add up to 100%. +class CordRepAnalyzer { + public: + // Creates an analyzer instance binding to `statistics`. + explicit CordRepAnalyzer(CordzStatistics& statistics) + : statistics_(statistics) {} + + // Analyzes the memory statistics and node counts for the provided `rep`, and + // adds the results to `statistics`. Note that node counts and memory sizes + // are not initialized, computed values are added to any existing values. + void AnalyzeCordRep(const CordRep* rep) { + // Process all linear nodes. + // As per the class comments, use refcout - 1 on the top level node, as the + // top level node is assumed to be referenced only for analysis purposes. + size_t refcount = rep->refcount.Get(); + RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1}; + + // Process all top level linear nodes (substrings and flats). + repref = CountLinearReps(repref, memory_usage_); + + if (repref.rep != nullptr) { + if (repref.rep->tag == RING) { + AnalyzeRing(repref); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref); + } else if (repref.rep->tag == CONCAT) { + AnalyzeConcat(repref); + } else { + // We should have either a concat, btree, or ring node if not null. + assert(false); + } + } + + // Adds values to output + statistics_.estimated_memory_usage += memory_usage_.total; + statistics_.estimated_fair_share_memory_usage += + static_cast<size_t>(memory_usage_.fair_share); + } + + private: + // RepRef identifies a CordRep* inside the Cord tree with its cumulative + // refcount including itself. For example, a tree consisting of a substring + // with a refcount of 3 and a child flat with a refcount of 4 will have RepRef + // refcounts of 3 and 12 respectively. + struct RepRef { + const CordRep* rep; + size_t refcount; + + // Returns a 'child' RepRef which contains the cumulative reference count of + // this instance multiplied by the child's reference count. + RepRef Child(const CordRep* child) const { + return RepRef{child, refcount * child->refcount.Get()}; + } + }; + + // Memory usage values + struct MemoryUsage { + size_t total = 0; + double fair_share = 0.0; + + // Adds 'size` memory usage to this class, with a cumulative (recursive) + // reference count of `refcount` + void Add(size_t size, size_t refcount) { + total += size; + fair_share += static_cast<double>(size) / refcount; + } + }; + + // Returns `rr` if `rr.rep` is not null and a CONCAT type. + // Asserts that `rr.rep` is a concat node or null. + static RepRef AssertConcat(RepRef repref) { + const CordRep* rep = repref.rep; + assert(rep == nullptr || rep->tag == CONCAT); + return (rep != nullptr && rep->tag == CONCAT) ? repref : RepRef{nullptr, 0}; + } + + // Counts a flat of the provide allocated size + void CountFlat(size_t size) { + statistics_.node_count++; + statistics_.node_counts.flat++; + if (size <= 64) { + statistics_.node_counts.flat_64++; + } else if (size <= 128) { + statistics_.node_counts.flat_128++; + } else if (size <= 256) { + statistics_.node_counts.flat_256++; + } else if (size <= 512) { + statistics_.node_counts.flat_512++; + } else if (size <= 1024) { + statistics_.node_counts.flat_1k++; + } + } + + // Processes 'linear' reps (substring, flat, external) not requiring iteration + // or recursion. Returns RefRep{null} if all reps were processed, else returns + // the top-most non-linear concat or ring cordrep. + // Node counts are updated into `statistics_`, memory usage is update into + // `memory_usage`, which typically references `memory_usage_` except for ring + // buffers where we count children unrounded. + RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) { + // Consume all substrings + while (rep.rep->tag == SUBSTRING) { + statistics_.node_count++; + statistics_.node_counts.substring++; + memory_usage.Add(sizeof(CordRepSubstring), rep.refcount); + rep = rep.Child(rep.rep->substring()->child); + } + + // Consume possible FLAT + if (rep.rep->tag >= FLAT) { + size_t size = rep.rep->flat()->AllocatedSize(); + CountFlat(size); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + // Consume possible external + if (rep.rep->tag == EXTERNAL) { + statistics_.node_count++; + statistics_.node_counts.external++; + size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + return rep; + } + + // Analyzes the provided concat node in a flattened recursive way. + void AnalyzeConcat(RepRef rep) { + y_absl::InlinedVector<RepRef, 47> pending; + + while (rep.rep != nullptr) { + const CordRepConcat* concat = rep.rep->concat(); + RepRef left = rep.Child(concat->left); + RepRef right = rep.Child(concat->right); + + statistics_.node_count++; + statistics_.node_counts.concat++; + memory_usage_.Add(sizeof(CordRepConcat), rep.refcount); + + right = AssertConcat(CountLinearReps(right, memory_usage_)); + rep = AssertConcat(CountLinearReps(left, memory_usage_)); + if (rep.rep != nullptr) { + if (right.rep != nullptr) { + pending.push_back(right); + } + } else if (right.rep != nullptr) { + rep = right; + } else if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } + } + } + + // Analyzes the provided ring. + void AnalyzeRing(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.ring++; + const CordRepRing* ring = rep.rep->ring(); + memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount); + ring->ForEach([&](CordRepRing::index_type pos) { + CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_); + }); + } + + // Analyzes the provided btree. + void AnalyzeBtree(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.btree++; + memory_usage_.Add(sizeof(CordRepBtree), rep.refcount); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge)); + } + } else { + for (CordRep* edge : tree->Edges()) { + CountLinearReps(rep.Child(edge), memory_usage_); + } + } + } + + CordzStatistics& statistics_; + MemoryUsage memory_usage_; +}; + +} // namespace + +CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) { + ABSL_ASSERT(snapshot.is_snapshot()); + + // We can do an 'unsafe' load of 'head', as we are guaranteed that the + // instance it points to is kept alive by the provided CordzSnapshot, so we + // can simply return the current value using an acquire load. + // We do enforce in DEBUG builds that the 'head' value is present in the + // delete queue: ODR violations may lead to 'snapshot' and 'global_list_' + // being in different libraries / modules. + CordzInfo* head = global_list_.head.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head)); + return head; +} + +CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { + ABSL_ASSERT(snapshot.is_snapshot()); + + // Similar to the 'Head()' function, we do not need a mutex here. + CordzInfo* next = ci_next_.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this)); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next)); + return next; +} + +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { + assert(cord.is_tree()); + assert(!cord.is_profiled()); + CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + assert(cord.is_tree()); + assert(src.is_tree()); + + // Unsample current as we the current cord is being replaced with 'src', + // so any method history is no longer relevant. + CordzInfo* cordz_info = cord.cordz_info(); + if (cordz_info != nullptr) cordz_info->Untrack(); + + // Start new cord sample + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + if (src.is_profiled()) { + TrackCord(cord, src, method); + } else if (cord.is_profiled()) { + cord.cordz_info()->Untrack(); + cord.clear_cordz_info(); + } +} + +CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) { + if (src == nullptr) return MethodIdentifier::kUnknown; + return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_ + : src->method_; +} + +int CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { + assert(stack); + if (src == nullptr) return 0; + if (src->parent_stack_depth_) { + memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*)); + return src->parent_stack_depth_; + } + memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*)); + return src->stack_depth_; +} + +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method) + : rep_(rep), + stack_depth_(y_absl::GetStackTrace(stack_, /*max_depth=*/kMaxStackDepth, + /*skip_count=*/1)), + parent_stack_depth_(FillParentStack(src, parent_stack_)), + method_(method), + parent_method_(GetParentMethod(src)), + create_time_(y_absl::Now()) { + update_tracker_.LossyAdd(method); + if (src) { + // Copy parent counters. + update_tracker_.LossyAdd(src->update_tracker_); + } +} + +CordzInfo::~CordzInfo() { + // `rep_` is potentially kept alive if CordzInfo is included + // in a collection snapshot (which should be rare). + if (ABSL_PREDICT_FALSE(rep_)) { + CordRep::Unref(rep_); + } +} + +void CordzInfo::Track() { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + if (head != nullptr) { + head->ci_prev_.store(this, std::memory_order_release); + } + ci_next_.store(head, std::memory_order_release); + list_->head.store(this, std::memory_order_release); +} + +void CordzInfo::Untrack() { + ODRCheck(); + { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + CordzInfo* const next = ci_next_.load(std::memory_order_acquire); + CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire); + + if (next) { + ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this); + next->ci_prev_.store(prev, std::memory_order_release); + } + if (prev) { + ABSL_ASSERT(head != this); + ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this); + prev->ci_next_.store(next, std::memory_order_release); + } else { + ABSL_ASSERT(head == this); + list_->head.store(next, std::memory_order_release); + } + } + + // We can no longer be discovered: perform a fast path check if we are not + // listed on any delete queue, so we can directly delete this instance. + if (SafeToDelete()) { + UnsafeSetCordRep(nullptr); + delete this; + return; + } + + // We are likely part of a snapshot, extend the life of the CordRep + { + y_absl::MutexLock lock(&mutex_); + if (rep_) CordRep::Ref(rep_); + } + CordzHandle::Delete(this); +} + +void CordzInfo::Lock(MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) { + mutex_.Lock(); + update_tracker_.LossyAdd(method); + assert(rep_); +} + +void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) { + bool tracked = rep_ != nullptr; + mutex_.Unlock(); + if (!tracked) { + Untrack(); + } +} + +y_absl::Span<void* const> CordzInfo::GetStack() const { + return y_absl::MakeConstSpan(stack_, stack_depth_); +} + +y_absl::Span<void* const> CordzInfo::GetParentStack() const { + return y_absl::MakeConstSpan(parent_stack_, parent_stack_depth_); +} + +CordzStatistics CordzInfo::GetCordzStatistics() const { + CordzStatistics stats; + stats.method = method_; + stats.parent_method = parent_method_; + stats.update_tracker = update_tracker_; + if (CordRep* rep = RefCordRep()) { + stats.size = rep->length; + CordRepAnalyzer analyzer(stats); + analyzer.AnalyzeCordRep(rep); + CordRep::Unref(rep); + } + return stats; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.h new file mode 100644 index 0000000000..e24214d259 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info.h @@ -0,0 +1,298 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_INFO_H_ +#define ABSL_STRINGS_CORDZ_INFO_H_ + +#include <atomic> +#include <cstdint> +#include <functional> + +#include "y_absl/base/config.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/internal/spinlock.h" +#include "y_absl/base/thread_annotations.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cordz_functions.h" +#include "y_absl/strings/internal/cordz_handle.h" +#include "y_absl/strings/internal/cordz_statistics.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" +#include "y_absl/synchronization/mutex.h" +#include "y_absl/types/span.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzInfo tracks a profiled Cord. Each of these objects can be in two places. +// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and +// can also be retrieved via the linked list starting with +// global_cordz_infos_head and continued via the cordz_info_next() method. When +// a Cord has reached the end of its lifespan, the CordzInfo object will be +// migrated out of the global_cordz_infos list and the global_cordz_infos_map, +// and will either be deleted or appended to the global_delete_queue. If it is +// placed on the global_delete_queue, the CordzInfo object will be cleaned in +// the destructor of a CordzSampleToken object. +class ABSL_LOCKABLE CordzInfo : public CordzHandle { + public: + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // TrackCord creates a CordzInfo instance which tracks important metrics of + // a sampled cord, and stores the created CordzInfo instance into `cord'. All + // CordzInfo instances are placed in a global list which is used to discover + // and snapshot all actively tracked cords. Callers are responsible for + // calling UntrackCord() before the tracked Cord instance is deleted, or to + // stop tracking the sampled Cord. Callers are also responsible for guarding + // changes to the 'tree' value of a Cord (InlineData.tree) through the Lock() + // and Unlock() calls. Any change resulting in a new tree value for the cord + // requires a call to SetCordRep() before the old tree has been unreffed + // and/or deleted. `method` identifies the Cord public API method initiating + // the cord to be sampled. + // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. + static void TrackCord(InlineData& cord, MethodIdentifier method); + + // Identical to TrackCord(), except that this function fills the + // `parent_stack` and `parent_method` properties of the returned CordzInfo + // instance from the provided `src` instance if `src` is sampled. + // This function should be used for sampling 'copy constructed' and 'copy + // assigned' cords. This function allows 'cord` to be already sampled, in + // which case the CordzInfo will be newly created from `src`. + static void TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // Uses `cordz_should_profile` to randomly pick cords to be sampled, and if + // so, invokes `TrackCord` to start sampling `cord`. + static void MaybeTrackCord(InlineData& cord, MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // `src` identifies a 'parent' cord which is assigned to `cord`, typically the + // input cord for a copy constructor, or an assign method such as `operator=` + // `cord` will be sampled if (and only if) `src` is sampled. + // If `cord` is currently being sampled and `src` is not being sampled, then + // this function will stop sampling the cord and reset the cord's cordz_info. + // + // Previously this function defined that `cord` will be sampled if either + // `src` is sampled, or if `cord` is randomly picked for sampling. However, + // this can cause issues, as there may be paths where some cord is assigned an + // indirect copy of it's own value. As such a 'string of copies' would then + // remain sampled (`src.is_profiled`), then assigning such a cord back to + // 'itself' creates a cycle where the cord will converge to 'always sampled`. + // + // For example: + // + // Cord x; + // for (...) { + // // Copy ctor --> y.is_profiled := x.is_profiled | random(...) + // Cord y = x; + // ... + // // Assign x = y --> x.is_profiled = y.is_profiled | random(...) + // // ==> x.is_profiled |= random(...) + // // ==> x converges to 'always profiled' + // x = y; + // } + static void MaybeTrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Stops tracking changes for a sampled cord, and deletes the provided info. + // This function must be called before the sampled cord instance is deleted, + // and before the root cordrep of the sampled cord is unreffed. + // This function may extend the lifetime of the cordrep in cases where the + // CordInfo instance is being held by a concurrent collection thread. + void Untrack(); + + // Invokes UntrackCord() on `info` if `info` is not null. + static void MaybeUntrackCord(CordzInfo* info); + + CordzInfo() = delete; + CordzInfo(const CordzInfo&) = delete; + CordzInfo& operator=(const CordzInfo&) = delete; + + // Retrieves the oldest existing CordzInfo. + static CordzInfo* Head(const CordzSnapshot& snapshot) + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Retrieves the next oldest existing CordzInfo older than 'this' instance. + CordzInfo* Next(const CordzSnapshot& snapshot) const + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Locks this instance for the update identified by `method`. + // Increases the count for `method` in `update_tracker`. + void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_); + + // Unlocks this instance. If the contained `rep` has been set to null + // indicating the Cord has been cleared or is otherwise no longer sampled, + // then this method will delete this CordzInfo instance. + void Unlock() ABSL_UNLOCK_FUNCTION(mutex_); + + // Asserts that this CordzInfo instance is locked. + void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_); + + // Updates the `rep` property of this instance. This methods is invoked by + // Cord logic each time the root node of a sampled Cord changes, and before + // the old root reference count is deleted. This guarantees that collection + // code can always safely take a reference on the tracked cord. + // Requires a lock to be held through the `Lock()` method. + // TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all + // Cord code is in a state where this can be proven true by the compiler. + void SetCordRep(CordRep* rep); + + // Returns the current `rep` property of this instance with a reference + // added, or null if this instance represents a cord that has since been + // deleted or untracked. + CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the current value of `rep_` for testing purposes only. + CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return rep_; + } + + // Sets the current value of `rep_` for testing purposes only. + void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS { + rep_ = rep; + } + + // Returns the stack trace for where the cord was first sampled. Cords are + // potentially sampled when they promote from an inlined cord to a tree or + // ring representation, which is not necessarily the location where the cord + // was first created. Some cords are created as inlined cords, and only as + // data is added do they become a non-inlined cord. However, typically the + // location represents reasonably well where the cord is 'created'. + y_absl::Span<void* const> GetStack() const; + + // Returns the stack trace for a sampled cord's 'parent stack trace'. This + // value may be set if the cord is sampled (promoted) after being created + // from, or being assigned the value of an existing (sampled) cord. + y_absl::Span<void* const> GetParentStack() const; + + // Retrieves the CordzStatistics associated with this Cord. The statistics + // are only updated when a Cord goes through a mutation, such as an Append + // or RemovePrefix. + CordzStatistics GetCordzStatistics() const; + + private: + using SpinLock = y_absl::base_internal::SpinLock; + using SpinLockHolder = ::y_absl::base_internal::SpinLockHolder; + + // Global cordz info list. CordzInfo stores a pointer to the global list + // instance to harden against ODR violations. + struct List { + constexpr explicit List(y_absl::ConstInitType) + : mutex(y_absl::kConstInit, + y_absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + SpinLock mutex; + std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr}; + }; + + static constexpr int kMaxStackDepth = 64; + + explicit CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method); + ~CordzInfo() override; + + // Sets `rep_` without holding a lock. + void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS; + + void Track(); + + // Returns the parent method from `src`, which is either `parent_method_` or + // `method_` depending on `parent_method_` being kUnknown. + // Returns kUnknown if `src` is null. + static MethodIdentifier GetParentMethod(const CordzInfo* src); + + // Fills the provided stack from `src`, copying either `parent_stack_` or + // `stack_` depending on `parent_stack_` being empty, returning the size of + // the parent stack. + // Returns 0 if `src` is null. + static int FillParentStack(const CordzInfo* src, void** stack); + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord"); +#endif + } + + // Non-inlined implementation of `MaybeTrackCord`, which is executed if + // either `src` is sampled or `cord` is sampled, and either untracks or + // tracks `cord` as documented per `MaybeTrackCord`. + static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + ABSL_CONST_INIT static List global_list_; + List* const list_ = &global_list_; + + // ci_prev_ and ci_next_ require the global list mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that list_ and global_list_ are one and the same. + std::atomic<CordzInfo*> ci_prev_{nullptr}; + std::atomic<CordzInfo*> ci_next_{nullptr}; + + mutable y_absl::Mutex mutex_; + CordRep* rep_ ABSL_GUARDED_BY(mutex_); + + void* stack_[kMaxStackDepth]; + void* parent_stack_[kMaxStackDepth]; + const int stack_depth_; + const int parent_stack_depth_; + const MethodIdentifier method_; + const MethodIdentifier parent_method_; + CordzUpdateTracker update_tracker_; + const y_absl::Time create_time_; +}; + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(cordz_should_profile())) { + TrackCord(cord, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, const InlineData& src, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) { + MaybeTrackCordImpl(cord, src, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord( + CordzInfo* info) { + if (ABSL_PREDICT_FALSE(info)) { + info->Untrack(); + } +} + +inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) { +#ifndef NDEBUG + mutex_.AssertHeld(); +#endif +} + +inline void CordzInfo::SetCordRep(CordRep* rep) { + AssertHeld(); + rep_ = rep; +} + +inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; } + +inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) { + MutexLock lock(&mutex_); + return rep_ ? CordRep::Ref(rep_) : nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORDZ_INFO_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info/ya.make new file mode 100644 index 0000000000..930eaa8b05 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info/ya.make @@ -0,0 +1,51 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/low_level_alloc + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/debugging + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/stacktrace + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/symbolize + contrib/restricted/abseil-cpp-tstring/y_absl/demangle + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/profiling/internal/exponential_biased + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization/internal + contrib/restricted/abseil-cpp-tstring/y_absl/time + contrib/restricted/abseil-cpp-tstring/y_absl/time/civil_time + contrib/restricted/abseil-cpp-tstring/y_absl/time/time_zone +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + cordz_info.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.cc new file mode 100644 index 0000000000..f29678adb8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.cc @@ -0,0 +1,64 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/cordz_sample_token.h" + +#include "y_absl/base/config.h" +#include "y_absl/strings/internal/cordz_handle.h" +#include "y_absl/strings/internal/cordz_info.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() { + if (current_) { + current_ = current_->Next(*token_); + } + return *this; +} + +CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) { + Iterator it(*this); + operator++(); + return it; +} + +bool operator==(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return lhs.current_ == rhs.current_ && + (lhs.current_ == nullptr || lhs.token_ == rhs.token_); +} + +bool operator!=(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return !(lhs == rhs); +} + +CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*() + const { + return *current_; +} + +CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->() + const { + return current_; +} + +CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token) + : token_(token), current_(CordzInfo::Head(*token)) {} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.h new file mode 100644 index 0000000000..85bed6dae8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token.h @@ -0,0 +1,97 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/base/config.h" +#include "y_absl/strings/internal/cordz_handle.h" +#include "y_absl/strings/internal/cordz_info.h" + +#ifndef ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ +#define ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// The existence of a CordzSampleToken guarantees that a reader can traverse the +// global_cordz_infos_head linked-list without needing to hold a mutex. When a +// CordzSampleToken exists, all CordzInfo objects that would be destroyed are +// instead appended to a deletion queue. When the CordzSampleToken is destroyed, +// it will also clean up any of these CordzInfo objects. +// +// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects. +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail +// +// This list tracks that CH1 and CH2 were created after ST1, so the thread +// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was +// created later, so the thread holding the ST2 token cannot have a reference to +// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, +// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only +// delete ST2. +// +// If ST1 is cleaned up first, the new list will be: +// ST2 <- CH3 <- global_delete_queue_tail +// +// If ST2 is cleaned up first, the new list will be: +// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail +// +// All new CordzHandle objects are appended to the list, so if a new thread +// comes along before either ST1 or ST2 are cleaned up, the new list will be: +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail +// +// A thread must hold the global_delete_queue_mu mutex whenever it's altering +// this list. +// +// It is safe for thread that holds a CordzSampleToken to read +// global_cordz_infos at any time since the objects it is able to retrieve will +// not be deleted while the CordzSampleToken exists. +class CordzSampleToken : public CordzSnapshot { + public: + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const CordzInfo&; + using difference_type = ptrdiff_t; + using pointer = const CordzInfo*; + using reference = value_type; + + Iterator() = default; + + Iterator& operator++(); + Iterator operator++(int); + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + friend bool operator!=(const Iterator& lhs, const Iterator& rhs); + reference operator*() const; + pointer operator->() const; + + private: + friend class CordzSampleToken; + explicit Iterator(const CordzSampleToken* token); + + const CordzSampleToken* token_ = nullptr; + pointer current_ = nullptr; + }; + + CordzSampleToken() = default; + CordzSampleToken(const CordzSampleToken&) = delete; + CordzSampleToken& operator=(const CordzSampleToken&) = delete; + + Iterator begin() { return Iterator(this); } + Iterator end() { return Iterator(); } +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token/ya.make new file mode 100644 index 0000000000..4d46274f4e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_sample_token/ya.make @@ -0,0 +1,52 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/low_level_alloc + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/debugging + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/stacktrace + contrib/restricted/abseil-cpp-tstring/y_absl/debugging/symbolize + contrib/restricted/abseil-cpp-tstring/y_absl/demangle + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/profiling/internal/exponential_biased + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_cord_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_functions + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_handle + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_info + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization + contrib/restricted/abseil-cpp-tstring/y_absl/synchronization/internal + contrib/restricted/abseil-cpp-tstring/y_absl/time + contrib/restricted/abseil-cpp-tstring/y_absl/time/civil_time + contrib/restricted/abseil-cpp-tstring/y_absl/time/time_zone +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCDIR(contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal) + +SRCS( + cordz_sample_token.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_statistics.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_statistics.h new file mode 100644 index 0000000000..34e7c34bd8 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_statistics.h @@ -0,0 +1,87 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ + +#include <cstdint> + +#include "y_absl/base/config.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzStatistics captures some meta information about a Cord's shape. +struct CordzStatistics { + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Node counts information + struct NodeCounts { + size_t flat = 0; // #flats + size_t flat_64 = 0; // #flats up to 64 bytes + size_t flat_128 = 0; // #flats up to 128 bytes + size_t flat_256 = 0; // #flats up to 256 bytes + size_t flat_512 = 0; // #flats up to 512 bytes + size_t flat_1k = 0; // #flats up to 1K bytes + size_t external = 0; // #external reps + size_t substring = 0; // #substring reps + size_t concat = 0; // #concat reps + size_t ring = 0; // #ring buffer reps + size_t btree = 0; // #btree reps + }; + + // The size of the cord in bytes. This matches the result of Cord::size(). + int64_t size = 0; + + // The estimated memory used by the sampled cord. This value matches the + // value as reported by Cord::EstimatedMemoryUsage(). + // A value of 0 implies the property has not been recorded. + int64_t estimated_memory_usage = 0; + + // The effective memory used by the sampled cord, inversely weighted by the + // effective indegree of each allocated node. This is a representation of the + // fair share of memory usage that should be attributed to the sampled cord. + // This value is more useful for cases where one or more nodes are referenced + // by multiple Cord instances, and for cases where a Cord includes the same + // node multiple times (either directly or indirectly). + // A value of 0 implies the property has not been recorded. + int64_t estimated_fair_share_memory_usage = 0; + + // The total number of nodes referenced by this cord. + // For ring buffer Cords, this includes the 'ring buffer' node. + // For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all + // the substring, flat and external nodes referenced by the tree. + // A value of 0 implies the property has not been recorded. + int64_t node_count = 0; + + // Detailed node counts per type + NodeCounts node_counts; + + // The cord method responsible for sampling the cord. + MethodIdentifier method = MethodIdentifier::kUnknown; + + // The cord method responsible for sampling the parent cord if applicable. + MethodIdentifier parent_method = MethodIdentifier::kUnknown; + + // Update tracker tracking invocation count per cord method. + CordzUpdateTracker update_tracker; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_scope.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_scope.h new file mode 100644 index 0000000000..66e0e8f51b --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_scope.h @@ -0,0 +1,71 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ + +#include "y_absl/base/config.h" +#include "y_absl/base/optimization.h" +#include "y_absl/base/thread_annotations.h" +#include "y_absl/strings/internal/cord_internal.h" +#include "y_absl/strings/internal/cordz_info.h" +#include "y_absl/strings/internal/cordz_update_tracker.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateScope scopes an update to the provided CordzInfo. +// The class invokes `info->Lock(method)` and `info->Unlock()` to guard +// cordrep updates. This class does nothing if `info` is null. +// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`. +class ABSL_SCOPED_LOCKABLE CordzUpdateScope { + public: + CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(info) + : info_(info) { + if (ABSL_PREDICT_FALSE(info_)) { + info->Lock(method); + } + } + + // CordzUpdateScope can not be copied or assigned to. + CordzUpdateScope(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope(const CordzUpdateScope&) = delete; + CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope& operator=(const CordzUpdateScope&) = delete; + + ~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() { + if (ABSL_PREDICT_FALSE(info_)) { + info_->Unlock(); + } + } + + void SetCordRep(CordRep* rep) const { + if (ABSL_PREDICT_FALSE(info_)) { + info_->SetCordRep(rep); + } + } + + CordzInfo* info() const { return info_; } + + private: + CordzInfo* info_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_tracker.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_tracker.h new file mode 100644 index 0000000000..48a449b4bf --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/cordz_update_tracker.h @@ -0,0 +1,121 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ + +#include <atomic> +#include <cstdint> + +#include "y_absl/base/config.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateTracker tracks counters for Cord update methods. +// +// The purpose of CordzUpdateTracker is to track the number of calls to methods +// updating Cord data for sampled cords. The class internally uses 'lossy' +// atomic operations: Cord is thread-compatible, so there is no need to +// synchronize updates. However, Cordz collection threads may call 'Value()' at +// any point, so the class needs to provide thread safe access. +// +// This class is thread-safe. But as per above comments, all non-const methods +// should be used single-threaded only: updates are thread-safe but lossy. +class CordzUpdateTracker { + public: + // Tracked update methods. + enum MethodIdentifier { + kUnknown, + kAppendBuffer, + kAppendCord, + kAppendExternalMemory, + kAppendString, + kAssignCord, + kAssignString, + kClear, + kConstructorCord, + kConstructorString, + kCordReader, + kFlatten, + kGetAppendRegion, + kMakeCordFromExternal, + kMoveAppendCord, + kMoveAssignCord, + kMovePrependCord, + kPrependBuffer, + kPrependCord, + kPrependString, + kRemovePrefix, + kRemoveSuffix, + kSubCord, + + // kNumMethods defines the number of entries: must be the last entry. + kNumMethods, + }; + + // Constructs a new instance. All counters are zero-initialized. + constexpr CordzUpdateTracker() noexcept : values_{} {} + + // Copy constructs a new instance. + CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; } + + // Assigns the provided value to this instance. + CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept { + for (int i = 0; i < kNumMethods; ++i) { + values_[i].store(rhs.values_[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + } + return *this; + } + + // Returns the value for the specified method. + int64_t Value(MethodIdentifier method) const { + return values_[method].load(std::memory_order_relaxed); + } + + // Increases the value for the specified method by `n` + void LossyAdd(MethodIdentifier method, int64_t n = 1) { + auto& value = values_[method]; + value.store(value.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } + + // Adds all the values from `src` to this instance + void LossyAdd(const CordzUpdateTracker& src) { + for (int i = 0; i < kNumMethods; ++i) { + MethodIdentifier method = static_cast<MethodIdentifier>(i); + if (int64_t value = src.Value(method)) { + LossyAdd(method, value); + } + } + } + + private: + // Until C++20 std::atomic is not constexpr default-constructible, so we need + // a wrapper for this class to be constexpr constructible. + class Counter : public std::atomic<int64_t> { + public: + constexpr Counter() noexcept : std::atomic<int64_t>(0) {} + }; + + Counter values_[kNumMethods]; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.cc new file mode 100644 index 0000000000..01b8974983 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/escaping.h" + +#include "y_absl/base/internal/endian.h" +#include "y_absl/base/internal/raw_logging.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = y_absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = y_absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + y_absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.h new file mode 100644 index 0000000000..d62fc0fbcb --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include <cassert> + +#include "y_absl/strings/internal/resize_uninitialized.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template <typename String> +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping_test_common.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping_test_common.h new file mode 100644 index 0000000000..f145127225 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/escaping_test_common.h @@ -0,0 +1,133 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test contains common things needed by both escaping_test.cc and +// escaping_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ + +#include <array> +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +struct base64_testcase { + y_absl::string_view plaintext; + y_absl::string_view cyphertext; +}; + +inline const std::array<base64_testcase, 5>& base64_strings() { + static const std::array<base64_testcase, 5> testcase{{ + // Some google quotes + // Cyphertext created with "uuencode (GNU sharutils) 4.6.3" + // (Note that we're testing the websafe encoding, though, so if + // you add messages, be sure to run "tr -- '+/' '-_'" on the output) + { "I was always good at math and science, and I never realized " + "that was unusual or somehow undesirable. So one of the things " + "I care a lot about is helping to remove that stigma, " + "to show girls that you can be feminine, you can like the things " + "that girls like, but you can also be really good at technology. " + "You can be really good at building things." + " - Marissa Meyer, Newsweek, 2010-12-22" "\n", + + "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg" + "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu" + "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg" + "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo" + "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp" + "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs" + "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy" + "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll" + "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" }, + + { "Typical first year for a new cluster: " + "~0.5 overheating " + "~1 PDU failure " + "~1 rack-move " + "~1 network rewiring " + "~20 rack failures " + "~5 racks go wonky " + "~8 network maintenances " + "~12 router reloads " + "~3 router failures " + "~dozens of minor 30-second blips for dns " + "~1000 individual machine failures " + "~thousands of hard drive failures " + "slow disks, bad memory, misconfigured machines, flaky machines, etc." + " - Jeff Dean, The Joys of Real Hardware" "\n", + + "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92" + "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3" + "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv" + "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk" + "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv" + "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp" + "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg" + "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs" + "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS" + "ZWFsIEhhcmR3YXJlCg" }, + + { "I'm the head of the webspam team at Google. " + "That means that if you type your name into Google and get porn back, " + "it's my fault. Unless you're a porn star, in which case porn is a " + "completely reasonable response." + " - Matt Cutts, Google Plus" "\n", + + "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg" + "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv" + "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz" + "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg" + "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs" + "IEdvb2dsZSBQbHVzCg" }, + + { "It will still be a long time before machines approach human " + "intelligence. " + "But luckily, machines don't actually have to be intelligent; " + "they just have to fake it. Access to a wealth of information, " + "combined with a rudimentary decision-making capacity, " + "can often be almost as useful. Of course, the results are better yet " + "when coupled with intelligence. A reference librarian with access to " + "a good search engine is a formidable tool." + " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" + "\n", + + "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg" + "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj" + "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg" + "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo" + "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg" + "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0" + "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy" + "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl" + "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu" + "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp" + "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw" + "NAo" }, + + // Degenerate edge case + { "", + "" }, + }}; + + return testcase; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.cc new file mode 100644 index 0000000000..0ba6574fdb --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.cc @@ -0,0 +1,112 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/memutil.h" + +#include <cstdlib> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2); + + for (size_t i = 0; i < len; i++) { + const int diff = + int{static_cast<unsigned char>(y_absl::ascii_tolower(us1[i]))} - + int{static_cast<unsigned char>(y_absl::ascii_tolower(us2[i]))}; + if (diff != 0) return diff; + } + return 0; +} + +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast<char*>(copy); +} + +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast<char*>(e); + } + return nullptr; +} + +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char* p = s; + const char* spanp; + char c, sc; + + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; +} + +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; + + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast<char*>(s); + } + return nullptr; +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ((match = static_cast<const char*>( + memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.h new file mode 100644 index 0000000000..ee442fe25f --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/memutil.h @@ -0,0 +1,148 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// These routines provide mem versions of standard C string routines, +// such as strpbrk. They function exactly the same as the str versions, +// so if you wonder what they are, replace the word "mem" by +// "str" and check out the man page. I could return void*, as the +// strutil.h mem*() routines tend to do, but I return char* instead +// since this is by far the most common way these functions are called. +// +// The difference between the mem and str versions is the mem version +// takes a pointer and a length, rather than a '\0'-terminated string. +// The memcase* routines defined here assume the locale is "C" +// (they use y_absl::ascii_tolower instead of tolower). +// +// These routines are based on the BSD library. +// +// Here's a list of routines from string.h, and their mem analogues. +// Functions in lowercase are defined in string.h; those in UPPERCASE +// are defined here: +// +// strlen -- +// strcat strncat MEMCAT +// strcpy strncpy memcpy +// -- memccpy (very cool function, btw) +// -- memmove +// -- memset +// strcmp strncmp memcmp +// strcasecmp strncasecmp MEMCASECMP +// strchr memchr +// strcoll -- +// strxfrm -- +// strdup strndup MEMDUP +// strrchr MEMRCHR +// strspn MEMSPN +// strcspn MEMCSPN +// strpbrk MEMPBRK +// strstr MEMSTR MEMMEM +// (g)strcasestr MEMCASESTR MEMCASEMEM +// strtok -- +// strprefix MEMPREFIX (strprefix is from strutil.h) +// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) +// strsuffix MEMSUFFIX (strsuffix is from strutil.h) +// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) +// -- MEMIS +// -- MEMCASEIS +// strcount MEMCOUNT (strcount is from strutil.h) + +#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ +#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ + +#include <cstddef> +#include <cstring> + +#include "y_absl/base/port.h" // disable some warnings on Windows +#include "y_absl/strings/ascii.h" // for y_absl::ascii_tolower + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +inline char* memcat(char* dest, size_t destlen, const char* src, + size_t srclen) { + return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); +} + +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); + +// This is for internal use only. Don't call this directly +template <bool case_sensitive> +const char* int_memmatch(const char* haystack, size_t haylen, + const char* needle, size_t neelen) { + if (0 == neelen) { + return haystack; // even if haylen is 0 + } + const char* hayend = haystack + haylen; + const char* needlestart = needle; + const char* needleend = needlestart + neelen; + + for (; haystack < hayend; ++haystack) { + char hay = case_sensitive + ? *haystack + : y_absl::ascii_tolower(static_cast<unsigned char>(*haystack)); + char nee = case_sensitive + ? *needle + : y_absl::ascii_tolower(static_cast<unsigned char>(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return haystack + 1 - neelen; + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } + } + return nullptr; +} + +// These are the guys you can call directly +inline const char* memstr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memcasestr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memmem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); +} + +inline const char* memcasemem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/numbers_test_common.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/numbers_test_common.h new file mode 100644 index 0000000000..12aec3ac11 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/numbers_test_common.h @@ -0,0 +1,184 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file contains common things needed by numbers_test.cc, +// numbers_legacy_test.cc and numbers_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ + +#include <array> +#include <cstdint> +#include <limits> +#include <util/generic/string.h> + +#include "y_absl/base/config.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename IntType> +inline bool Itoa(IntType value, int base, TString* destination) { + destination->clear(); + if (base <= 1 || base > 36) { + return false; + } + + if (value == 0) { + destination->push_back('0'); + return true; + } + + bool negative = value < 0; + while (value != 0) { + const IntType next_value = value / base; + // Can't use std::abs here because of problems when IntType is unsigned. + int remainder = + static_cast<int>(value > next_value * base ? value - next_value * base + : next_value * base - value); + char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; + destination->insert(0, 1, c); + value = next_value; + } + + if (negative) { + destination->insert(0, 1, '-'); + } + return true; +} + +struct uint32_test_case { + const char* str; + bool expect_ok; + int base; // base to pass to the conversion function + uint32_t expected; +}; + +inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() { + static const std::array<uint32_test_case, 27> test_cases{{ + {"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {"0x34234324", true, 16, 0x34234324}, + {"34234324", true, 16, 0x34234324}, + {"0", true, 16, 0}, + {" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + {" \t\n 72717222", true, 8, 072717222}, + {" \t\n 072717222", true, 8, 072717222}, + {" \t\n 072717228", false, 8, 07271722}, + {"0", true, 0, 0}, + + // Base-10 version. + {"34234324", true, 0, 34234324}, + {"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()}, + {"34234324 \n\t", true, 10, 34234324}, + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, // would be valid hex, but prefix is missing + {"34234324a", false, 0, 34234324}, + {"34234.3", false, 0, 34234}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +struct uint64_test_case { + const char* str; + bool expect_ok; + int base; + uint64_t expected; +}; + +inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { + static const std::array<uint64_test_case, 34> test_cases{{ + {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}}, + {"3423432448783446", true, 16, int64_t{0x3423432448783446}}, + + {"0", true, 16, 0}, + {"000", true, 0, 0}, + {"0", true, 0, 0}, + {" \t\n 0xffffffffffffffff", true, 16, + (std::numeric_limits<uint64_t>::max)()}, + + {"012345670123456701234", true, 8, int64_t{012345670123456701234}}, + {"12345670123456701234", true, 8, int64_t{012345670123456701234}}, + + {"12845670123456701234", false, 8, 0}, + + // Base-10 version. + {"34234324487834466", true, 0, int64_t{34234324487834466}}, + + {" \t\n 18446744073709551615", true, 0, + (std::numeric_limits<uint64_t>::max)()}, + + {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}}, + + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + {"0", true, 0, 0}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, + {"34234324487834466a", false, 0, 0}, + {"34234487834466.3", false, 0, 0}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"18446744073709551616", false, 10, 0}, + {"18446744073709551616", false, 0, 0}, + {"0x10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + {"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 0, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + + {"0x1234", true, 16, 0x1234}, + + // Base-10 string version. + {"1234", true, 0, 1234}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.cc new file mode 100644 index 0000000000..ba18857d83 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.cc @@ -0,0 +1,36 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/ostringstream.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +OStringStream::Buf::int_type OStringStream::overflow(int c) { + assert(s_); + if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof())) + s_->push_back(static_cast<char>(c)); + return 1; +} + +std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) { + assert(s_); + s_->append(s, n); + return n; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.h new file mode 100644 index 0000000000..d00cef9c23 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/ostringstream.h @@ -0,0 +1,89 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ +#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ + +#include <cassert> +#include <ostream> +#include <streambuf> +#include <util/generic/string.h> + +#include "y_absl/base/port.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The same as std::ostringstream but appends to a user-specified TString, +// and is faster. It is ~70% faster to create, ~50% faster to write to, and +// completely free to extract the result TString. +// +// TString s; +// OStringStream strm(&s); +// strm << 42 << ' ' << 3.14; // appends to `s` +// +// The stream object doesn't have to be named. Starting from C++11 operator<< +// works with rvalues of std::ostream. +// +// TString s; +// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s` +// +// OStringStream is faster to create than std::ostringstream but it's still +// relatively slow. Avoid creating multiple streams where a single stream will +// do. +// +// Creates unnecessary instances of OStringStream: slow. +// +// TString s; +// OStringStream(&s) << 42; +// OStringStream(&s) << ' '; +// OStringStream(&s) << 3.14; +// +// Creates a single instance of OStringStream and reuses it: fast. +// +// TString s; +// OStringStream strm(&s); +// strm << 42; +// strm << ' '; +// strm << 3.14; +// +// Note: flush() has no effect. No reason to call it. +class OStringStream : private std::basic_streambuf<char>, public std::ostream { + public: + // The argument can be null, in which case you'll need to call str(p) with a + // non-null argument before you can write to the stream. + // + // The destructor of OStringStream doesn't use the TString. It's OK to + // destroy the TString before the stream. + explicit OStringStream(TString* s) : std::ostream(this), s_(s) {} + + TString* str() { return s_; } + const TString* str() const { return s_; } + void str(TString* s) { s_ = s; } + + private: + using Buf = std::basic_streambuf<char>; + + Buf::int_type overflow(int c) override; + std::streamsize xsputn(const char* s, std::streamsize n) override; + + TString* s_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/pow10_helper.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/pow10_helper.h new file mode 100644 index 0000000000..e4d41d7e4e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/pow10_helper.h @@ -0,0 +1,40 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test helper library contains a table of powers of 10, to guarantee +// precise values are computed across the full range of doubles. We can't rely +// on the pow() function, because not all standard libraries ship a version +// that is precise. +#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ +#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ + +#include <vector> + +#include "y_absl/base/config.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Computes the precise value of 10^exp. (I.e. the nearest representable +// double to the exact value, rounding to nearest-even in the (single) case of +// being exactly halfway between.) +double Pow10(int exp); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/resize_uninitialized.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/resize_uninitialized.h new file mode 100644 index 0000000000..14860bb237 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/resize_uninitialized.h @@ -0,0 +1,119 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ + +#include <algorithm> +#include <util/generic/string.h> +#include <type_traits> +#include <utility> + +#include "y_absl/base/port.h" +#include "y_absl/meta/type_traits.h" // for void_t + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// In this type trait, we look for a __resize_default_init member function, and +// we use it if available, otherwise, we use resize. We provide HasMember to +// indicate whether __resize_default_init is present. +template <typename string_type, typename = void> +struct ResizeUninitializedTraits { + using HasMember = std::false_type; + static void Resize(string_type* s, size_t new_size) { s->resize(new_size); } +}; + +// __resize_default_init is provided by libc++ >= 8.0 +template <typename string_type> +struct ResizeUninitializedTraits< + string_type, y_absl::void_t<decltype(std::declval<string_type&>() + .__resize_default_init(237))> > { + using HasMember = std::true_type; + static void Resize(string_type* s, size_t new_size) { + s->__resize_default_init(new_size); + } +}; + +// Returns true if the TString implementation supports a resize where +// the new characters added to the TString are left untouched. +// +// (A better name might be "STLStringSupportsUninitializedResize", alluding to +// the previous function.) +template <typename string_type> +inline constexpr bool STLStringSupportsNontrashingResize(string_type*) { + return ResizeUninitializedTraits<string_type>::HasMember::value; +} + +// Like str->resize(new_size), except any new characters added to "*str" as a +// result of resizing may be left uninitialized, rather than being filled with +// '0' bytes. Typically used when code is then going to overwrite the backing +// store of the TString with known data. +template <typename string_type, typename = void> +inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { + ResizeUninitializedTraits<string_type>::Resize(s, new_size); +} + +// Used to ensure exponential growth so that the amortized complexity of +// increasing the string size by a small amount is O(1), in contrast to +// O(str->size()) in the case of precise growth. +template <typename string_type> +void STLStringReserveAmortized(string_type* s, size_t new_size) { + const size_t cap = s->capacity(); + if (new_size > cap) { + // Make sure to always grow by at least a factor of 2x. + s->reserve((std::max)(new_size, 2 * cap)); + } +} + +// In this type trait, we look for an __append_default_init member function, and +// we use it if available, otherwise, we use append. +template <typename string_type, typename = void> +struct AppendUninitializedTraits { + static void Append(string_type* s, size_t n) { + s->append(n, typename string_type::value_type()); + } +}; + +template <typename string_type> +struct AppendUninitializedTraits< + string_type, y_absl::void_t<decltype(std::declval<string_type&>() + .__append_default_init(237))> > { + static void Append(string_type* s, size_t n) { + s->__append_default_init(n); + } +}; + +// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use +// exponential growth so that the amortized complexity of increasing the string +// size by a small amount is O(1), in contrast to O(str->size()) in the case of +// precise growth. +template <typename string_type> +void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) { + const size_t size = s->size(); + if (new_size > size) { + AppendUninitializedTraits<string_type>::Append(s, new_size - size); + } else { + s->erase(new_size); + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/stl_type_traits.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/stl_type_traits.h new file mode 100644 index 0000000000..db8d4635d0 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/stl_type_traits.h @@ -0,0 +1,248 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type +// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug +// wrappers of STL containers. +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// y_absl/strings/str_split.h. +// +// IWYU pragma: private, include "y_absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ +#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ + +#include <array> +#include <bitset> +#include <deque> +#include <forward_list> +#include <list> +#include <map> +#include <set> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "y_absl/meta/type_traits.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename C, template <typename...> class T> +struct IsSpecializationImpl : std::false_type {}; +template <template <typename...> class T, typename... Args> +struct IsSpecializationImpl<T<Args...>, T> : std::true_type {}; +template <typename C, template <typename...> class T> +using IsSpecialization = IsSpecializationImpl<y_absl::decay_t<C>, T>; + +template <typename C> +struct IsArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsArray = IsArrayImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBitset = IsBitsetImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsSTLContainer + : y_absl::disjunction< + IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>, + IsSpecialization<C, std::forward_list>, + IsSpecialization<C, std::list>, IsSpecialization<C, std::map>, + IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>, + IsSpecialization<C, std::multiset>, + IsSpecialization<C, std::unordered_map>, + IsSpecialization<C, std::unordered_multimap>, + IsSpecialization<C, std::unordered_set>, + IsSpecialization<C, std::unordered_multiset>, + IsSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsBaseOfSpecializationImpl : std::false_type {}; +// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE +// on the existence of container dependent types and plug them into the STL +// template. +template <typename C, template <typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, y_absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> { +}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> { +}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<y_absl::decay_t<C>, T>; + +template <typename C> +struct IsBaseOfArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> { +}; +template <typename C> +using IsBaseOfArray = IsBaseOfArrayImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBaseOfBitset = IsBaseOfBitsetImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfSTLContainer + : y_absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>, + IsBaseOfSpecialization<C, std::deque>, + IsBaseOfSpecialization<C, std::forward_list>, + IsBaseOfSpecialization<C, std::list>, + IsBaseOfSpecialization<C, std::map>, + IsBaseOfSpecialization<C, std::multimap>, + IsBaseOfSpecialization<C, std::set>, + IsBaseOfSpecialization<C, std::multiset>, + IsBaseOfSpecialization<C, std::unordered_map>, + IsBaseOfSpecialization<C, std::unordered_multimap>, + IsBaseOfSpecialization<C, std::unordered_set>, + IsBaseOfSpecialization<C, std::unordered_multiset>, + IsBaseOfSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsConvertibleToSpecializationImpl : std::false_type {}; +// IsConvertibleToSpecializationImpl needs multiple partial specializations to +// SFINAE on the existence of container dependent types and plug them into the +// STL template. +template <typename C, template <typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, y_absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + y_absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsConvertibleToSpecialization = + IsConvertibleToSpecializationImpl<y_absl::decay_t<C>, T>; + +template <typename C> +struct IsConvertibleToArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsConvertibleToArrayImpl<A<T, N>> + : std::is_convertible<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsConvertibleToArray = IsConvertibleToArrayImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsConvertibleToBitsetImpl<B<N>> + : std::is_convertible<B<N>, std::bitset<N>> {}; +template <typename C> +using IsConvertibleToBitset = IsConvertibleToBitsetImpl<y_absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToSTLContainer + : y_absl::disjunction< + IsConvertibleToArray<C>, IsConvertibleToBitset<C>, + IsConvertibleToSpecialization<C, std::deque>, + IsConvertibleToSpecialization<C, std::forward_list>, + IsConvertibleToSpecialization<C, std::list>, + IsConvertibleToSpecialization<C, std::map>, + IsConvertibleToSpecialization<C, std::multimap>, + IsConvertibleToSpecialization<C, std::set>, + IsConvertibleToSpecialization<C, std::multiset>, + IsConvertibleToSpecialization<C, std::unordered_map>, + IsConvertibleToSpecialization<C, std::unordered_multimap>, + IsConvertibleToSpecialization<C, std::unordered_set>, + IsConvertibleToSpecialization<C, std::unordered_multiset>, + IsConvertibleToSpecialization<C, std::vector>> {}; + +template <typename C> +struct IsStrictlyBaseOfAndConvertibleToSTLContainer + : y_absl::conjunction<y_absl::negation<IsSTLContainer<C>>, + IsBaseOfSTLContainer<C>, + IsConvertibleToSTLContainer<C>> {}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl +#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/.yandex_meta/licenses.list.txt b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..33d60b3d2b --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/.yandex_meta/licenses.list.txt @@ -0,0 +1,20 @@ +====================Apache-2.0==================== +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +====================COPYRIGHT==================== +// Copyright 2017 The Abseil Authors. + + +====================COPYRIGHT==================== +// Copyright 2020 The Abseil Authors. diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.cc new file mode 100644 index 0000000000..8d5c3b61ac --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.cc @@ -0,0 +1,488 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// POSIX spec: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html +// +#include "y_absl/strings/internal/str_format/arg.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <util/generic/string.h> +#include <type_traits> + +#include "y_absl/base/port.h" +#include "y_absl/strings/internal/str_format/float_conversion.h" +#include "y_absl/strings/numbers.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +// Reduce *capacity by s.size(), clipped to a 0 minimum. +void ReducePadding(string_view s, size_t *capacity) { + *capacity = Excess(s.size(), *capacity); +} + +// Reduce *capacity by n, clipped to a 0 minimum. +void ReducePadding(size_t n, size_t *capacity) { + *capacity = Excess(n, *capacity); +} + +template <typename T> +struct MakeUnsigned : std::make_unsigned<T> {}; +template <> +struct MakeUnsigned<y_absl::int128> { + using type = y_absl::uint128; +}; +template <> +struct MakeUnsigned<y_absl::uint128> { + using type = y_absl::uint128; +}; + +template <typename T> +struct IsSigned : std::is_signed<T> {}; +template <> +struct IsSigned<y_absl::int128> : std::true_type {}; +template <> +struct IsSigned<y_absl::uint128> : std::false_type {}; + +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { + public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. + template <typename T> + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; + } + PrintAsDec(u, add_neg); + } + + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; + } + + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; + } + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; + } + + bool is_negative() const { return start_[0] == '-'; } + + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; +}; + +// Note: 'o' conversions do not have a base indicator, it's just that +// the '#' flag is specified to modify the precision for 'o' conversions. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); + // From the POSIX description of '#' flag: + // "For x or X conversion specifiers, a non-zero result shall have + // 0x (or 0X) prefixed to it." + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; + } + return {}; +} + +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { + if (neg) return "-"; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; + } + return {}; +} + +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + ReducePadding(1, &fill); + if (!conv.has_left_flag()) sink->Append(fill, ' '); + sink->Append(1, v); + if (conv.has_left_flag()) sink->Append(fill, ' '); + return true; +} + +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + // Print as a sequence of Substrings: + // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + + string_view formatted = as_digits.without_neg_or_zero(); + ReducePadding(formatted, &fill); + + string_view sign = SignColumn(as_digits.is_negative(), conv); + ReducePadding(sign, &fill); + + string_view base_indicator = BaseIndicator(as_digits, conv); + ReducePadding(base_indicator, &fill); + + int precision = conv.precision(); + bool precision_specified = precision >= 0; + if (!precision_specified) + precision = 1; + + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { + // From POSIX description of the '#' (alt) flag: + // "For o conversion, it increases the precision (if necessary) to + // force the first digit of the result to be zero." + if (formatted.empty() || *formatted.begin() != '0') { + int needed = static_cast<int>(formatted.size()) + 1; + precision = std::max(precision, needed); + } + } + + size_t num_zeroes = Excess(formatted.size(), precision); + ReducePadding(num_zeroes, &fill); + + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; + + // From POSIX description of the '0' (zero) flag: + // "For d, i, o, u, x, and X conversion specifiers, if a precision + // is specified, the '0' flag is ignored." + if (!precision_specified && conv.has_zero_flag()) { + num_zeroes += num_left_spaces; + num_left_spaces = 0; + } + + sink->Append(num_left_spaces, ' '); + sink->Append(sign); + sink->Append(base_indicator); + sink->Append(num_zeroes, '0'); + sink->Append(formatted); + sink->Append(num_right_spaces, ' '); + return true; +} + +template <typename T> +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); + } + + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; + } + return ConvertIntImplInnerSlow(as_digits, conv, sink); +} + +template <typename T> +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +// ==================== Strings ==================== +StringConvertResult FormatConvertImpl(const TString &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) + return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; + size_t len; + if (v == nullptr) { + len = 0; + } else if (conv.precision() < 0) { + len = std::strlen(v); + } else { + // If precision is set, we look for the NUL-terminator on the valid range. + len = std::find(v, v + conv.precision(), '\0') - v; + } + return {ConvertStringArg(string_view(v, len), conv, sink)}; +} + +// ==================== Raw pointers ==================== +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (!v.value) { + sink->Append("(nil)"); + return {true}; + } + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; +} + +// ==================== Floats ==================== +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(long double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} + +// ==================== Chars ==================== +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(signed char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +// ==================== Ints ==================== +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(y_absl::int128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(y_absl::uint128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(); + + + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.h new file mode 100644 index 0000000000..59b7bcc727 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/arg.h @@ -0,0 +1,528 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ + +#include <string.h> +#include <wchar.h> + +#include <cstdio> +#include <iomanip> +#include <limits> +#include <memory> +#include <sstream> +#include <util/generic/string.h> +#include <util/stream/str.h> +#include <type_traits> + +#include "y_absl/base/port.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/numeric/int128.h" +#include "y_absl/strings/internal/str_format/extension.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +class Cord; +class FormatCountCapture; +class FormatSink; + +template <y_absl::FormatConversionCharSet C> +struct FormatConvertResult; +class FormatConversionSpec; + +namespace str_format_internal { + +template <typename T, typename = void> +struct HasUserDefinedConvert : std::false_type {}; + +template <typename T> +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +void AbslFormatConvert(); // Stops the lexical name lookup +template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> decltype(AbslFormatConvert(v, + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>())) { + using FormatConversionSpecT = + y_absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>; + using FormatSinkT = + y_absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fcs = conv.Wrap<FormatConversionSpecT>(); + auto fs = sink->Wrap<FormatSinkT>(); + return AbslFormatConvert(v, fcs, &fs); +} + +template <typename T> +class StreamedWrapper; + +// If 'v' can be converted (in the printf sense) according to 'conv', +// then convert it, appending to `sink` and return `true`. +// Otherwise fail and return `false`. + +// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v' +// as an extension mechanism. These FormatConvertImpl functions are the default +// implementations. +// The ADL search is augmented via the 'Sink*' parameter, which also +// serves as a disambiguator to reject possible unintended 'AbslFormatConvert' +// functions in the namespaces associated with 'v'. + +// Raw pointers. +struct VoidPtr { + VoidPtr() = default; + template <typename T, + decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0> + VoidPtr(T* ptr) // NOLINT + : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} + uintptr_t value; +}; + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) { + return C; +} + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Strings. +StringConvertResult FormatConvertImpl(const TString& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +inline StringConvertResult FormatConvertImpl(std::string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(y_absl::string_view(v.data(), v.size()), conv, sink); +} +#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, y_absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); + size_t space_remaining = 0; + + int width = conv.width(); + if (width >= 0) space_remaining = width; + + size_t to_write = value.size(); + + int precision = conv.precision(); + if (precision >= 0) + to_write = (std::min)(to_write, static_cast<size_t>(precision)); + + space_remaining = Excess(to_write, space_remaining); + + if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' '); + + for (string_view piece : value.Chunks()) { + if (piece.size() > to_write) { + piece.remove_suffix(piece.size() - to_write); + to_write = 0; + } else { + to_write -= piece.size(); + } + sink->Append(piece); + if (to_write == 0) { + break; + } + } + + if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' '); + return {true}; +} + +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; + +// Floats. +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Chars. +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Ints. +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(static_cast<int>(v), conv, sink); +} + +// We provide this function to help the checker, but it is never defined. +// FormatArgImpl will use the underlying Convert functions instead. +template <typename T> +typename std::enable_if<std::is_enum<T>::value && + !HasUserDefinedConvert<T>::value, + IntegralConvertResult>::type +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +template <typename T> +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + TString buf; + TStringOutput oss(buf); + oss << v.v_; + if (!buf) return {false}; + return str_format_internal::FormatConvertImpl(buf, conv, out); +} + +// Use templates and dependent types to delay evaluation of the function +// until after FormatCountCapture is fully defined. +struct FormatCountCaptureHelper { + template <class T = int> + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + const y_absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; + + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } + *v2.p_ = static_cast<int>(sink->size()); + return {true}; + } +}; + +template <class T = int> +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); +} + +// Helper friend struct to hide implementation details from the public API of +// FormatArgImpl. +struct FormatArgImplFriend { + template <typename Arg> + static bool ToInt(Arg arg, int* out) { + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. + return arg.dispatcher_(arg.data_, {}, out); + } + + template <typename Arg> + static bool Convert(Arg arg, FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + return arg.dispatcher_(arg.data_, conv, out); + } + + template <typename Arg> + static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) { + return arg.dispatcher_; + } +}; + +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return y_absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + +// A type-erased handle to a format argument. +class FormatArgImpl { + private: + enum { kInlinedSpace = 8 }; + + using VoidPtr = str_format_internal::VoidPtr; + + union Data { + const void* ptr; + const volatile void* volatile_ptr; + char buf[kInlinedSpace]; + }; + + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); + + template <typename T> + struct store_by_value + : std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) && + (std::is_integral<T>::value || + std::is_floating_point<T>::value || + std::is_pointer<T>::value || + std::is_same<VoidPtr, T>::value)> {}; + + enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue }; + template <typename T> + struct storage_policy + : std::integral_constant<StoragePolicy, + (std::is_volatile<T>::value + ? ByVolatilePointer + : (store_by_value<T>::value ? ByValue + : ByPointer))> { + }; + + // To reduce the number of vtables we will decay values before hand. + // Anything with a user-defined Convert will get its own vtable. + // For everything else: + // - Decay char* and char arrays into `const char*` + // - Decay any other pointer to `const void*` + // - Decay all enums to their underlying type. + // - Decay function pointers to void*. + template <typename T, typename = void> + struct DecayType { + static constexpr bool kHasUserDefined = + str_format_internal::HasUserDefinedConvert<T>::value; + using type = typename std::conditional< + !kHasUserDefined && std::is_convertible<T, const char*>::value, + const char*, + typename std::conditional<!kHasUserDefined && + std::is_convertible<T, VoidPtr>::value, + VoidPtr, const T&>::type>::type; + }; + template <typename T> + struct DecayType<T, + typename std::enable_if< + !str_format_internal::HasUserDefinedConvert<T>::value && + std::is_enum<T>::value>::type> { + using type = typename std::underlying_type<T>::type; + }; + + public: + template <typename T> + explicit FormatArgImpl(const T& value) { + using D = typename DecayType<T>::type; + static_assert( + std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue, + "Decayed types must be stored by value"); + Init(static_cast<D>(value)); + } + + private: + friend struct str_format_internal::FormatArgImplFriend; + template <typename T, StoragePolicy = storage_policy<T>::value> + struct Manager; + + template <typename T> + struct Manager<T, ByPointer> { + static Data SetValue(const T& value) { + Data data; + data.ptr = std::addressof(value); + return data; + } + + static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); } + }; + + template <typename T> + struct Manager<T, ByVolatilePointer> { + static Data SetValue(const T& value) { + Data data; + data.volatile_ptr = &value; + return data; + } + + static const T& Value(Data arg) { + return *static_cast<const T*>(arg.volatile_ptr); + } + }; + + template <typename T> + struct Manager<T, ByValue> { + static Data SetValue(const T& value) { + Data data; + memcpy(data.buf, &value, sizeof(value)); + return data; + } + + static T Value(Data arg) { + T value; + memcpy(&value, arg.buf, sizeof(T)); + return value; + } + }; + + template <typename T> + void Init(const T& value) { + data_ = Manager<T>::SetValue(value); + dispatcher_ = &Dispatch<T>; + } + + template <typename T> + static int ToIntVal(const T& val) { + using CommonType = typename std::conditional<std::is_signed<T>::value, + int64_t, uint64_t>::type; + if (static_cast<CommonType>(val) > + static_cast<CommonType>((std::numeric_limits<int>::max)())) { + return (std::numeric_limits<int>::max)(); + } else if (std::is_signed<T>::value && + static_cast<CommonType>(val) < + static_cast<CommonType>((std::numeric_limits<int>::min)())) { + return (std::numeric_limits<int>::min)(); + } + return static_cast<int>(val); + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::true_type /* is_integral */, + std::false_type) { + *out = ToIntVal(Manager<T>::Value(arg)); + return true; + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::false_type, + std::true_type /* is_enum */) { + *out = ToIntVal(static_cast<typename std::underlying_type<T>::type>( + Manager<T>::Value(arg))); + return true; + } + + template <typename T> + static bool ToInt(Data, int*, std::false_type, std::false_type) { + return false; + } + + template <typename T> + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { + // A `none` conv indicates that we want the `int` conversion. + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { + return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), + std::is_enum<T>()); + } + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } + return str_format_internal::FormatConvertImpl( + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) + .value; + } + + Data data_; + Dispatcher dispatcher_; +}; + +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) + +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(TString, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__) + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern); + + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.cc new file mode 100644 index 0000000000..211ce25dea --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.cc @@ -0,0 +1,258 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/str_format/bind.h" + +#include <cerrno> +#include <limits> +#include <sstream> +#include <util/generic/string.h> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +inline bool BindFromPosition(int position, int* value, + y_absl::Span<const FormatArgImpl> pack) { + assert(position > 0); + if (static_cast<size_t>(position) > pack.size()) { + return false; + } + // -1 because positions are 1-based + return FormatArgImplFriend::ToInt(pack[position - 1], value); +} + +class ArgContext { + public: + explicit ArgContext(y_absl::Span<const FormatArgImpl> pack) : pack_(pack) {} + + // Fill 'bound' with the results of applying the context's argument pack + // to the specified 'unbound'. We synthesize a BoundConversion by + // lining up a UnboundConversion with a user argument. We also + // resolve any '*' specifiers for width and precision, so after + // this call, 'bound' has all the information it needs to be formatted. + // Returns false on failure. + bool Bind(const UnboundConversion* unbound, BoundConversion* bound); + + private: + y_absl::Span<const FormatArgImpl> pack_; +}; + +inline bool ArgContext::Bind(const UnboundConversion* unbound, + BoundConversion* bound) { + const FormatArgImpl* arg = nullptr; + int arg_position = unbound->arg_position; + if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; + arg = &pack_[arg_position - 1]; // 1-based + + if (unbound->flags != Flags::kBasic) { + int width = unbound->width.value(); + bool force_left = false; + if (unbound->width.is_from_arg()) { + if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_)) + return false; + if (width < 0) { + // "A negative field width is taken as a '-' flag followed by a + // positive field width." + force_left = true; + // Make sure we don't overflow the width when negating it. + width = -std::max(width, -std::numeric_limits<int>::max()); + } + } + + int precision = unbound->precision.value(); + if (unbound->precision.is_from_arg()) { + if (!BindFromPosition(unbound->precision.get_from_arg(), &precision, + pack_)) + return false; + } + + FormatConversionSpecImplFriend::SetWidth(width, bound); + FormatConversionSpecImplFriend::SetPrecision(precision, bound); + + if (force_left) { + FormatConversionSpecImplFriend::SetFlags(unbound->flags | Flags::kLeft, + bound); + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + } + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + FormatConversionSpecImplFriend::SetWidth(-1, bound); + FormatConversionSpecImplFriend::SetPrecision(-1, bound); + } + FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound); + bound->set_arg(arg); + return true; +} + +template <typename Converter> +class ConverterConsumer { + public: + ConverterConsumer(Converter converter, y_absl::Span<const FormatArgImpl> pack) + : converter_(converter), arg_context_(pack) {} + + bool Append(string_view s) { + converter_.Append(s); + return true; + } + bool ConvertOne(const UnboundConversion& conv, string_view conv_string) { + BoundConversion bound; + if (!arg_context_.Bind(&conv, &bound)) return false; + return converter_.ConvertOne(bound, conv_string); + } + + private: + Converter converter_; + ArgContext arg_context_; +}; + +template <typename Converter> +bool ConvertAll(const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args, Converter converter) { + if (format.has_parsed_conversion()) { + return format.parsed_conversion()->ProcessFormat( + ConverterConsumer<Converter>(converter, args)); + } else { + return ParseFormatString(format.str(), + ConverterConsumer<Converter>(converter, args)); + } +} + +class DefaultConverter { + public: + explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_); + } + + private: + FormatSinkImpl* sink_; +}; + +class SummarizingConverter { + public: + explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + UntypedFormatSpecImpl spec("%d"); + + std::ostringstream ss; + ss << "{" << Streamable(spec, {*bound.arg()}) << ":" + << FormatConversionSpecImplFriend::FlagsToString(bound); + if (bound.width() >= 0) ss << bound.width(); + if (bound.precision() >= 0) ss << "." << bound.precision(); + ss << bound.conversion_char() << "}"; + Append(ss.str()); + return true; + } + + private: + FormatSinkImpl* sink_; +}; + +} // namespace + +bool BindWithPack(const UnboundConversion* props, + y_absl::Span<const FormatArgImpl> pack, + BoundConversion* bound) { + return ArgContext(pack).Bind(props, bound); +} + +TString Summarize(const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + typedef SummarizingConverter Converter; + TString out; + { + // inner block to destroy sink before returning out. It ensures a last + // flush. + FormatSinkImpl sink(&out); + if (!ConvertAll(format, args, Converter(&sink))) { + return ""; + } + } + return out; +} + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + FormatSinkImpl sink(raw_sink); + using Converter = DefaultConverter; + return ConvertAll(format, args, Converter(&sink)); +} + +std::ostream& Streamable::Print(std::ostream& os) const { + if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit); + return os; +} + +TString& AppendPack(TString* out, const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + size_t orig = out->size(); + if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) { + out->erase(orig); + } + return *out; +} + +TString FormatPack(const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + TString out; + if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) { + out.clear(); + } + return out; +} + +int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + FILERawSink sink(output); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + if (sink.error()) { + errno = sink.error(); + return -1; + } + if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) { + errno = EFBIG; + return -1; + } + return static_cast<int>(sink.count()); +} + +int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args) { + BufferRawSink sink(output, size ? size - 1 : 0); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + size_t total = sink.total_written(); + if (size) output[std::min(total, size - 1)] = 0; + return static_cast<int>(total); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.h new file mode 100644 index 0000000000..3966610710 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/bind.h @@ -0,0 +1,217 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ + +#include <array> +#include <cstdio> +#include <sstream> +#include <util/generic/string.h> + +#include "y_absl/base/port.h" +#include "y_absl/strings/internal/str_format/arg.h" +#include "y_absl/strings/internal/str_format/checker.h" +#include "y_absl/strings/internal/str_format/parser.h" +#include "y_absl/types/span.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +class UntypedFormatSpec; + +namespace str_format_internal { + +class BoundConversion : public FormatConversionSpecImpl { + public: + const FormatArgImpl* arg() const { return arg_; } + void set_arg(const FormatArgImpl* a) { arg_ = a; } + + private: + const FormatArgImpl* arg_; +}; + +// This is the type-erased class that the implementation uses. +class UntypedFormatSpecImpl { + public: + UntypedFormatSpecImpl() = delete; + + explicit UntypedFormatSpecImpl(string_view s) + : data_(s.data()), size_(s.size()) {} + explicit UntypedFormatSpecImpl( + const str_format_internal::ParsedFormatBase* pc) + : data_(pc), size_(~size_t{}) {} + + bool has_parsed_conversion() const { return size_ == ~size_t{}; } + + string_view str() const { + assert(!has_parsed_conversion()); + return string_view(static_cast<const char*>(data_), size_); + } + const str_format_internal::ParsedFormatBase* parsed_conversion() const { + assert(has_parsed_conversion()); + return static_cast<const str_format_internal::ParsedFormatBase*>(data_); + } + + template <typename T> + static const UntypedFormatSpecImpl& Extract(const T& s) { + return s.spec_; + } + + private: + const void* data_; + size_t size_; +}; + +template <typename T, FormatConversionCharSet...> +struct MakeDependent { + using type = T; +}; + +// Implicitly convertible from `const char*`, `string_view`, and the +// `ExtendedParsedFormat` type. This abstraction allows all format functions to +// operate on any without providing too many overloads. +template <FormatConversionCharSet... Args> +class FormatSpecTemplate + : public MakeDependent<UntypedFormatSpec, Args...>::type { + using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; + + public: +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + // Honeypot overload for when the string is not constexpr. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + FormatSpecTemplate(...) // NOLINT + __attribute__((unavailable("Format string is not constexpr."))); + + // Honeypot overload for when the format is constexpr and invalid. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + // To avoid checking the format twice, we just check that the format is + // constexpr. If it is valid, then the overload below will kick in. + // We add the template here to make this overload have lower priority. + template <typename = void> + FormatSpecTemplate(const char* s) // NOLINT + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"), + unavailable( + "Format specified does not match the arguments passed."))); + + template <typename T = void> + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(str_format_internal::EnsureConstexpr(s), + "constexpr trap"))) { + static_assert(sizeof(T*) == 0, + "Format specified does not match the arguments passed."); + } + + // Good format overload. + FormatSpecTemplate(const char* s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + +#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT + FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + template < + FormatConversionCharSet... C, + typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, + typename = typename std::enable_if<AllOf(Contains(Args, + C)...)>::type> + FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT + : Base(&pc) {} +}; + +class Streamable { + public: + Streamable(const UntypedFormatSpecImpl& format, + y_absl::Span<const FormatArgImpl> args) + : format_(format) { + if (args.size() <= ABSL_ARRAYSIZE(few_args_)) { + for (size_t i = 0; i < args.size(); ++i) { + few_args_[i] = args[i]; + } + args_ = y_absl::MakeSpan(few_args_, args.size()); + } else { + many_args_.assign(args.begin(), args.end()); + args_ = many_args_; + } + } + + std::ostream& Print(std::ostream& os) const; + + friend std::ostream& operator<<(std::ostream& os, const Streamable& l) { + return l.Print(os); + } + + private: + const UntypedFormatSpecImpl& format_; + y_absl::Span<const FormatArgImpl> args_; + // if args_.size() is 4 or less: + FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0), + FormatArgImpl(0), FormatArgImpl(0)}; + // if args_.size() is more than 4: + std::vector<FormatArgImpl> many_args_; +}; + +// for testing +TString Summarize(UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); +bool BindWithPack(const UnboundConversion* props, + y_absl::Span<const FormatArgImpl> pack, BoundConversion* bound); + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); + +TString& AppendPack(TString* out, UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); + +TString FormatPack(const UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); + +int FprintF(std::FILE* output, UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); +int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format, + y_absl::Span<const FormatArgImpl> args); + +// Returned by Streamed(v). Converts via '%s' to the TString created +// by std::ostream << v. +template <typename T> +class StreamedWrapper { + public: + explicit StreamedWrapper(const T& v) : v_(v) { } + + private: + template <typename S> + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); + const T& v_; +}; + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/checker.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/checker.h new file mode 100644 index 0000000000..7c530d2507 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/checker.h @@ -0,0 +1,333 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ + +#include "y_absl/base/attributes.h" +#include "y_absl/strings/internal/str_format/arg.h" +#include "y_absl/strings/internal/str_format/extension.h" + +// Compile time check support for entry points. + +#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 +#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +constexpr bool AllOf() { return true; } + +template <typename... T> +constexpr bool AllOf(bool b, T... t) { + return b && AllOf(t...); +} + +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +constexpr bool ContainsChar(const char* chars, char c) { + return *chars == c || (*chars && ContainsChar(chars + 1, c)); +} + +// A constexpr compatible list of Convs. +struct ConvList { + const FormatConversionCharSet* array; + int count; + + // We do the bound check here to avoid having to do it on the callers. + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; + } + + constexpr ConvList without_front() const { + return count != 0 ? ConvList{array + 1, count - 1} : *this; + } +}; + +template <size_t count> +struct ConvListT { + // Make sure the array has size > 0. + FormatConversionCharSet list[count ? count : 1]; +}; + +constexpr char GetChar(string_view str, size_t index) { + return index < str.size() ? str[index] : char{}; +} + +constexpr string_view ConsumeFront(string_view str, size_t len = 1) { + return len <= str.size() ? string_view(str.data() + len, str.size() - len) + : string_view(); +} + +constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { + return ContainsChar(chars, GetChar(format, 0)) + ? ConsumeAnyOf(ConsumeFront(format), chars) + : format; +} + +constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +// Helper class for the ParseDigits function. +// It encapsulates the two return values we need there. +struct Integer { + string_view format; + int value; + + // If the next character is a '$', consume it. + // Otherwise, make `this` an invalid positional argument. + constexpr Integer ConsumePositionalDollar() const { + return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value} + : Integer{format, 0}; + } +}; + +constexpr Integer ParseDigits(string_view format, int value = 0) { + return IsDigit(GetChar(format, 0)) + ? ParseDigits(ConsumeFront(format), + 10 * value + GetChar(format, 0) - '0') + : Integer{format, value}; +} + +// Parse digits for a positional argument. +// The parsing also consumes the '$'. +constexpr Integer ParsePositional(string_view format) { + return ParseDigits(format).ConsumePositionalDollar(); +} + +// Parses a single conversion specifier. +// See ConvParser::Run() for post conditions. +class ConvParser { + constexpr ConvParser SetFormat(string_view format) const { + return ConvParser(format, args_, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetArgs(ConvList args) const { + return ConvParser(format_, args, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetError(bool error) const { + return ConvParser(format_, args_, error_ || error, arg_position_, + is_positional_); + } + + constexpr ConvParser SetArgPosition(int arg_position) const { + return ConvParser(format_, args_, error_, arg_position, is_positional_); + } + + // Consumes the next arg and verifies that it matches `conv`. + // `error_` is set if there is no next arg or if it doesn't match `conv`. + constexpr ConvParser ConsumeNextArg(char conv) const { + return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); + } + + // Verify that positional argument `i.value` matches `conv`. + // `error_` is set if `i.value` is not a valid argument or if it doesn't + // match. + constexpr ConvParser VerifyPositional(Integer i, char conv) const { + return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); + } + + // Parse the position of the arg and store it in `arg_position_`. + constexpr ConvParser ParseArgPosition(Integer arg) const { + return SetFormat(arg.format).SetArgPosition(arg.value); + } + + // Consume the flags. + constexpr ConvParser ParseFlags() const { + return SetFormat(ConsumeAnyOf(format_, "-+ #0")); + } + + // Consume the width. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParseWidth() const { + return IsDigit(GetChar(format_, 0)) + ? SetFormat(ParseDigits(format_).format) + : GetChar(format_, 0) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_)), '*') + : SetFormat(ConsumeFront(format_)) + .ConsumeNextArg('*') + : *this; + } + + // Consume the precision. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParsePrecision() const { + return GetChar(format_, 0) != '.' + ? *this + : GetChar(format_, 1) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_, 2)), '*') + : SetFormat(ConsumeFront(format_, 2)) + .ConsumeNextArg('*') + : SetFormat(ParseDigits(ConsumeFront(format_)).format); + } + + // Consume the length characters. + constexpr ConvParser ParseLength() const { + return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); + } + + // Consume the conversion character and verify that it matches `args_`. + // `error_` is set if it doesn't match. + constexpr ConvParser ParseConversion() const { + return is_positional_ + ? VerifyPositional({ConsumeFront(format_), arg_position_}, + GetChar(format_, 0)) + : ConsumeNextArg(GetChar(format_, 0)) + .SetFormat(ConsumeFront(format_)); + } + + constexpr ConvParser(string_view format, ConvList args, bool error, + int arg_position, bool is_positional) + : format_(format), + args_(args), + error_(error), + arg_position_(arg_position), + is_positional_(is_positional) {} + + public: + constexpr ConvParser(string_view format, ConvList args, bool is_positional) + : format_(format), + args_(args), + error_(false), + arg_position_(0), + is_positional_(is_positional) {} + + // Consume the whole conversion specifier. + // `format()` will be set to the character after the conversion character. + // `error()` will be set if any of the arguments do not match. + constexpr ConvParser Run() const { + return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this) + .ParseFlags() + .ParseWidth() + .ParsePrecision() + .ParseLength() + .ParseConversion(); + } + + constexpr string_view format() const { return format_; } + constexpr ConvList args() const { return args_; } + constexpr bool error() const { return error_; } + constexpr bool is_positional() const { return is_positional_; } + + private: + string_view format_; + // Current list of arguments. If we are not in positional mode we will consume + // from the front. + ConvList args_; + bool error_; + // Holds the argument position of the conversion character, if we are in + // positional mode. Otherwise, it is unspecified. + int arg_position_; + // Whether we are in positional mode. + // It changes the behavior of '*' and where to find the converted argument. + bool is_positional_; +}; + +// Parses a whole format expression. +// See FormatParser::Run(). +class FormatParser { + static constexpr bool FoundPercent(string_view format) { + return format.empty() || + (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); + } + + // We use an inner function to increase the recursion limit. + // The inner function consumes up to `limit` characters on every run. + // This increases the limit from 512 to ~512*limit. + static constexpr string_view ConsumeNonPercentInner(string_view format, + int limit = 20) { + return FoundPercent(format) || !limit + ? format + : ConsumeNonPercentInner( + ConsumeFront(format, GetChar(format, 0) == '%' && + GetChar(format, 1) == '%' + ? 2 + : 1), + limit - 1); + } + + // Consume characters until the next conversion spec %. + // It skips %%. + static constexpr string_view ConsumeNonPercent(string_view format) { + return FoundPercent(format) + ? format + : ConsumeNonPercent(ConsumeNonPercentInner(format)); + } + + static constexpr bool IsPositional(string_view format) { + return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format)) + : GetChar(format, 0) == '$'; + } + + constexpr bool RunImpl(bool is_positional) const { + // In non-positional mode we require all arguments to be consumed. + // In positional mode just reaching the end of the format without errors is + // enough. + return (format_.empty() && (is_positional || args_.count == 0)) || + (!format_.empty() && + ValidateArg( + ConvParser(ConsumeFront(format_), args_, is_positional).Run())); + } + + constexpr bool ValidateArg(ConvParser conv) const { + return !conv.error() && FormatParser(conv.format(), conv.args()) + .RunImpl(conv.is_positional()); + } + + public: + constexpr FormatParser(string_view format, ConvList args) + : format_(ConsumeNonPercent(format)), args_(args) {} + + // Runs the parser for `format` and `args`. + // It verifies that the format is valid and that all conversion specifiers + // match the arguments passed. + // In non-positional mode it also verfies that all arguments are consumed. + constexpr bool Run() const { + return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); + } + + private: + string_view format_; + // Current list of arguments. + // If we are not in positional mode we will consume from the front and will + // have to be empty in the end. + ConvList args_; +}; + +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + return FormatParser(format, + {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) + .Run(); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.cc new file mode 100644 index 0000000000..f2a4169ae7 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.cc @@ -0,0 +1,75 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/str_format/extension.h" + +#include <errno.h> +#include <algorithm> +#include <util/generic/string.h> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +TString FlagsToString(Flags v) { + TString s; + s.append(FlagsContains(v, Flags::kLeft) ? "-" : ""); + s.append(FlagsContains(v, Flags::kShowPos) ? "+" : ""); + s.append(FlagsContains(v, Flags::kSignCol) ? " " : ""); + s.append(FlagsContains(v, Flags::kAlt) ? "#" : ""); + s.append(FlagsContains(v, Flags::kZero) ? "0" : ""); + return s; +} + +#define ABSL_INTERNAL_X_VAL(id) \ + constexpr y_absl::FormatConversionChar FormatConversionCharInternal::id; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr y_absl::FormatConversionChar FormatConversionCharInternal::kNone; + +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + constexpr FormatConversionCharSet FormatConversionCharSetInternal::c; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { + size_t space_remaining = 0; + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); + space_remaining = Excess(shown.size(), space_remaining); + if (!left) Append(space_remaining, ' '); + Append(shown); + if (left) Append(space_remaining, ' '); + return true; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.h new file mode 100644 index 0000000000..e5de5cb6a1 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/extension.h @@ -0,0 +1,445 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ + +#include <limits.h> + +#include <cstddef> +#include <cstring> +#include <ostream> + +#include "y_absl/base/config.h" +#include "y_absl/base/port.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/strings/internal/str_format/output.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + +namespace str_format_internal { + +class FormatRawSinkImpl { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, decltype(str_format_internal::InvokeFlush( + std::declval<T*>(), string_view()))* = nullptr> + FormatRawSinkImpl(T* raw) // NOLINT + : sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {} + + void Write(string_view s) { write_(sink_, s); } + + template <typename T> + static FormatRawSinkImpl Extract(T s) { + return s.sink_; + } + + private: + template <typename T> + static void Flush(void* r, string_view s) { + str_format_internal::InvokeFlush(static_cast<T*>(r), s); + } + + void* sink_; + void (*write_)(void*, string_view); +}; + +// An abstraction to which conversions write their string data. +class FormatSinkImpl { + public: + explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {} + + ~FormatSinkImpl() { Flush(); } + + void Flush() { + raw_.Write(string_view(buf_, pos_ - buf_)); + pos_ = buf_; + } + + void Append(size_t n, char c) { + if (n == 0) return; + size_ += n; + auto raw_append = [&](size_t count) { + memset(pos_, c, count); + pos_ += count; + }; + while (n > Avail()) { + n -= Avail(); + if (Avail() > 0) { + raw_append(Avail()); + } + Flush(); + } + raw_append(n); + } + + void Append(string_view v) { + size_t n = v.size(); + if (n == 0) return; + size_ += n; + if (n >= Avail()) { + Flush(); + raw_.Write(v); + return; + } + memcpy(pos_, v.data(), n); + pos_ += n; + } + + size_t size() const { return size_; } + + // Put 'v' to 'sink' with specified width, precision, and left flag. + bool PutPaddedString(string_view v, int width, int precision, bool left); + + template <typename T> + T Wrap() { + return T(this); + } + + template <typename T> + static FormatSinkImpl* Extract(T* s) { + return s->sink_; + } + + private: + size_t Avail() const { return buf_ + sizeof(buf_) - pos_; } + + FormatRawSinkImpl raw_; + size_t size_ = 0; + char* pos_ = buf_; + char buf_[1024]; +}; + +enum class Flags : uint8_t { + kBasic = 0, + kLeft = 1 << 0, + kShowPos = 1 << 1, + kSignCol = 1 << 2, + kAlt = 1 << 3, + kZero = 1 << 4, + // This is not a real flag. It just exists to turn off kBasic when no other + // flags are set. This is for when width/precision are specified. + kNonBasic = 1 << 5, +}; + +constexpr Flags operator|(Flags a, Flags b) { + return static_cast<Flags>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b)); +} + +constexpr bool FlagsContains(Flags haystack, Flags needle) { + return (static_cast<uint8_t>(haystack) & static_cast<uint8_t>(needle)) == + static_cast<uint8_t>(needle); +} + +TString FlagsToString(Flags v); + +inline std::ostream& operator<<(std::ostream& os, Flags v) { + return os << FlagsToString(v); +} + +// clang-format off +#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ + /* text */ \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ + /* ints */ \ + X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ + X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ + /* floats */ \ + X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \ + X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ + /* misc */ \ + X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (y_absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; + + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p, // misc + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); +}; +// clang-format on + +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define ABSL_INTERNAL_X_VAL(id) \ + case #id[0]: \ + return FormatConversionCharInternal::id; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + } + return FormatConversionCharInternal::kNone; +} + +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; + } +} + +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; + } +} + +inline char FormatConversionCharToChar(FormatConversionChar c) { + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ + return #e[0]; +#define ABSL_INTERNAL_X_SEP + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + +#undef ABSL_INTERNAL_X_VAL +#undef ABSL_INTERNAL_X_SEP +} + +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} + +struct FormatConversionSpecImplFriend; + +class FormatConversionSpecImpl { + public: + // Width and precison are not specified, no flags are set. + bool is_basic() const { return flags_ == Flags::kBasic; } + bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } + bool has_show_pos_flag() const { + return FlagsContains(flags_, Flags::kShowPos); + } + bool has_sign_col_flag() const { + return FlagsContains(flags_, Flags::kSignCol); + } + bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); } + bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); } + + FormatConversionChar conversion_char() const { + // Keep this field first in the struct . It generates better code when + // accessing it when ConversionSpec is passed by value in registers. + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); + return conv_; + } + + // Returns the specified width. If width is unspecfied, it returns a negative + // value. + int width() const { return width_; } + // Returns the specified precision. If precision is unspecfied, it returns a + // negative value. + int precision() const { return precision_; } + + template <typename T> + T Wrap() { + return T(*this); + } + + private: + friend struct str_format_internal::FormatConversionSpecImplFriend; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; + Flags flags_; + int width_; + int precision_; +}; + +struct FormatConversionSpecImplFriend final { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { + conv->flags_ = f; + } + static void SetConversionChar(FormatConversionChar c, + FormatConversionSpecImpl* conv) { + conv->conv_ = c; + } + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { + conv->precision_ = p; + } + static TString FlagsToString(const FormatConversionSpecImpl& spec) { + return str_format_internal::FlagsToString(spec.flags_); + } +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { + return +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ + : + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + conv == '*' + ? 1 + : 0; +} + +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + + // Used for width/precision '*' specification. + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kPointer = p; +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return FormatConversionCharSetUnion(a, b); +} + +// Overloaded conversion functions to support y_absl::ParsedFormat. +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); +} + +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; +} + +template <typename T> +void ToFormatConversionCharSet(T) = delete; + +// Checks whether `c` exists in `set`. +constexpr bool Contains(FormatConversionCharSet set, char c) { + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, + FormatConversionCharSet c) { + return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) == + static_cast<uint64_t>(c); +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} + +// Return capacity - used, clipped to a minimum of 0. +inline size_t Excess(size_t used, size_t capacity) { + return used < capacity ? capacity - used : 0; +} + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.cc new file mode 100644 index 0000000000..c49062538d --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.cc @@ -0,0 +1,1423 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/str_format/float_conversion.h" + +#include <string.h> + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <limits> +#include <util/generic/string.h> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/base/optimization.h" +#include "y_absl/functional/function_ref.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/numeric/bits.h" +#include "y_absl/numeric/int128.h" +#include "y_absl/numeric/internal/representation.h" +#include "y_absl/strings/numbers.h" +#include "y_absl/types/optional.h" +#include "y_absl/types/span.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +using ::y_absl::numeric_internal::IsDoubleDouble; + +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = y_absl::FunctionRef<void(y_absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(y_absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = y_absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +using MaxFloatType = + typename std::conditional<IsDoubleDouble(), double, long double>::type; + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<MaxFloatType>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + y_absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<MaxFloatType>::max_exponent); + static_assert( + static_cast<int>(StackArray::kMaxCapacity) >= + ChunksNeeded(std::numeric_limits<MaxFloatType>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](y_absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + y_absl::string_view CurrentDigits() const { + return y_absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(y_absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr int kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + y_absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<MaxFloatType>::min_exponent - limits<MaxFloatType>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, y_absl::FunctionRef<void(FractionalDigitGenerator)> f) { + using Limits = std::numeric_limits<MaxFloatType>; + assert(-exp < 0); + assert(-exp >= Limits::min_exponent - 128); + static_assert(StackArray::kMaxCapacity >= + (Limits::digits + 128 - Limits::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((Limits::digits + exp + 31) / 32, + [=](y_absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(y_absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + y_absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return countl_zero(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? countl_zero(high) : 64 + countl_zero(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || + static_cast<size_t>(state.conv.width()) <= total_size) { + return {0, 0, 0}; + } + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(const FormatState &state, y_absl::string_view data, + int padding_offset, int trailing_zeros, + y_absl::string_view data_postfix) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + return; + } + + auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + + data.size() + data_postfix.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + // Padding in general needs to be inserted somewhere in the middle of `data`. + state.sink->Append(data.substr(0, padding_offset)); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data.substr(padding_offset)); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(state, y_absl::string_view(integral_digits_start, size), + /*padding_offset=*/0, + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + /*data_postfix=*/""); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to +// bits 4-7. +template <typename Int> +uint8_t GetNibble(Int n, int nibble_index) { + constexpr Int mask_low_nibble = Int{0xf}; + int shift = nibble_index * 4; + n &= mask_low_nibble << shift; + return static_cast<uint8_t>((n >> shift) & 0xf); +} + +// Add one to the given nibble, applying carry to higher nibbles. Returns true +// if overflow, false otherwise. +template <typename Int> +bool IncrementNibble(int nibble_index, Int *n) { + constexpr int kShift = sizeof(Int) * 8 - 1; + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + Int before = *n >> kShift; + // Here we essentially want to take the number 1 and move it into the requsted + // nibble, then add it to *n to effectively increment the nibble. However, + // ASan will complain if we try to shift the 1 beyond the limits of the Int, + // i.e., if the nibble_index is out of range. So therefore we check for this + // and if we are out of range we just add 0 which leaves *n unchanged, which + // seems like the reasonable thing to do in that case. + *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << (nibble_index * 4))); + Int after = *n >> kShift; + return (before && !after) || (nibble_index >= kNumNibbles); +} + +// Return a mask with 1's in the given nibble and all lower nibbles. +template <typename Int> +Int MaskUpToNibbleInclusive(int nibble_index) { + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + static const Int ones = ~Int{0}; + return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1)); +} + +// Return a mask with 1's below the given nibble. +template <typename Int> +Int MaskUpToNibbleExclusive(int nibble_index) { + return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1); +} + +template <typename Int> +Int MoveToNibble(uint8_t nibble, int nibble_index) { + return Int{nibble} << (4 * nibble_index); +} + +// Given mantissa size, find optimal # of mantissa bits to put in initial digit. +// +// In the hex representation we keep a single hex digit to the left of the dot. +// However, the question as to how many bits of the mantissa should be put into +// that hex digit in theory is arbitrary, but in practice it is optimal to +// choose based on the size of the mantissa. E.g., for a `double`, there are 53 +// mantissa bits, so that means that we should put 1 bit to the left of the dot, +// thereby leaving 52 bits to the right, which is evenly divisible by four and +// thus all fractional digits represent actual precision. For a `long double`, +// on the other hand, there are 64 bits of mantissa, thus we can use all four +// bits for the initial hex digit and still have a number left over (60) that is +// a multiple of four. Once again, the goal is to have all fractional digits +// represent real precision. +template <typename Float> +constexpr int HexFloatLeadingDigitSizeInBits() { + return std::numeric_limits<Float>::digits % 4 > 0 + ? std::numeric_limits<Float>::digits % 4 + : 4; +} + +// This function captures the rounding behavior of glibc for hex float +// representations. E.g. when rounding 0x1.ab800000 to a precision of .2 +// ("%.2a") glibc will round up because it rounds toward the even number (since +// 0xb is an odd number, it will round up to 0xc). However, when rounding at a +// point that is not followed by 800000..., it disregards the parity and rounds +// up if > 8 and rounds down if < 8. +template <typename Int> +bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed, + uint8_t leading) { + // If the last nibble (hex digit) to be displayed is the lowest on in the + // mantissa then that means that we don't have any further nibbles to inform + // rounding, so don't round. + if (final_nibble_displayed <= 0) { + return false; + } + int rounding_nibble_idx = final_nibble_displayed - 1; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + assert(final_nibble_displayed <= kTotalNibbles); + Int mantissa_up_to_rounding_nibble_inclusive = + mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx); + Int eight = MoveToNibble<Int>(8, rounding_nibble_idx); + if (mantissa_up_to_rounding_nibble_inclusive != eight) { + return mantissa_up_to_rounding_nibble_inclusive > eight; + } + // Nibble in question == 8. + uint8_t round_if_odd = (final_nibble_displayed == kTotalNibbles) + ? leading + : GetNibble(mantissa, final_nibble_displayed); + return round_if_odd % 2 == 1; +} + +// Stores values associated with a Float type needed by the FormatA +// implementation in order to avoid templatizing that function by the Float +// type. +struct HexFloatTypeParams { + template <typename Float> + explicit HexFloatTypeParams(Float) + : min_exponent(std::numeric_limits<Float>::min_exponent - 1), + leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) { + assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4); + } + + int min_exponent; + int leading_digit_size_bits; +}; + +// Hex Float Rounding. First check if we need to round; if so, then we do that +// by manipulating (incrementing) the mantissa, that way we can later print the +// mantissa digits by iterating through them in the same way regardless of +// whether a rounding happened. +template <typename Int> +void FormatARound(bool precision_specified, const FormatState &state, + uint8_t *leading, Int *mantissa, int *exp) { + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Index of the last nibble that we could display given precision. + int final_nibble_displayed = + precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0; + if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed, *leading)) { + // Need to round up. + bool overflow = IncrementNibble(final_nibble_displayed, mantissa); + *leading += (overflow ? 1 : 0); + if (ABSL_PREDICT_FALSE(*leading > 15)) { + // We have overflowed the leading digit. This would mean that we would + // need two hex digits to the left of the dot, which is not allowed. So + // adjust the mantissa and exponent so that the result is always 1.0eXXX. + *leading = 1; + *mantissa = 0; + *exp += 4; + } + } + // Now that we have handled a possible round-up we can go ahead and zero out + // all the nibbles of the mantissa that we won't need. + if (precision_specified) { + *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed); + } +} + +template <typename Int> +void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading, + Int *mantissa, int *exp) { + constexpr int kIntBits = sizeof(Int) * 8; + static const Int kHighIntBit = Int{1} << (kIntBits - 1); + const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits; + // Normalize mantissa so that highest bit set is in MSB position, unless we + // get interrupted by the exponent threshold. + while (*mantissa && !(*mantissa & kHighIntBit)) { + if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) { + *mantissa >>= (float_traits.min_exponent - *exp); + *exp = float_traits.min_exponent; + return; + } + *mantissa <<= 1; + --*exp; + } + // Extract bits for leading digit then shift them away leaving the + // fractional part. + *leading = + static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount)); + *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp; + *mantissa <<= kLeadDigitBitsCount; +} + +template <typename Int> +void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, + bool uppercase, const FormatState &state) { + // Int properties. + constexpr int kIntBits = sizeof(Int) * 8; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Did the user specify a precision explicitly? + const bool precision_specified = state.conv.precision() >= 0; + + // ========== Normalize/Denormalize ========== + exp += kIntBits; // make all digits fractional digits. + // This holds the (up to four) bits of leading digit, i.e., the '1' in the + // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal. + uint8_t leading = 0; + FormatANormalize(float_traits, &leading, &mantissa, &exp); + + // =============== Rounding ================== + // Check if we need to round; if so, then we do that by manipulating + // (incrementing) the mantissa before beginning to print characters. + FormatARound(precision_specified, state, &leading, &mantissa, &exp); + + // ============= Format Result =============== + // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the + // size with long double which is the largest of the floats. + constexpr size_t kBufSizeForHexFloatRepr = + 2 // 0x + + std::numeric_limits<MaxFloatType>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) + char digits_buffer[kBufSizeForHexFloatRepr]; + char *digits_iter = digits_buffer; + const char *const digits = + static_cast<const char *>("0123456789ABCDEF0123456789abcdef") + + (uppercase ? 0 : 16); + + // =============== Hex Prefix ================ + *digits_iter++ = '0'; + *digits_iter++ = uppercase ? 'X' : 'x'; + + // ========== Non-Fractional Digit =========== + *digits_iter++ = digits[leading]; + + // ================== Dot ==================== + // There are three reasons we might need a dot. Keep in mind that, at this + // point, the mantissa holds only the fractional part. + if ((precision_specified && state.precision > 0) || + (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) { + *digits_iter++ = '.'; + } + + // ============ Fractional Digits ============ + int digits_emitted = 0; + while (mantissa > 0) { + *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)]; + mantissa <<= 4; + ++digits_emitted; + } + int trailing_zeros = + precision_specified ? state.precision - digits_emitted : 0; + assert(trailing_zeros >= 0); + auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer); + + // =============== Exponent ================== + constexpr size_t kBufSizeForExpDecRepr = + numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + + 1 // 'p' or 'P' + + 1; // '+' or '-' + char exp_buffer[kBufSizeForExpDecRepr]; + exp_buffer[0] = uppercase ? 'P' : 'p'; + exp_buffer[1] = exp >= 0 ? '+' : '-'; + numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2); + + // ============ Assemble Result ============== + FinalPrint(state, // + digits_result, // 0xN.NNN... + 2, // offset in `data` to start padding if needed. + trailing_zeros, // num remaining mantissa padding zeros + exp_buffer); // exponent +} + +char *CopyStringTo(y_absl::string_view v, char *out) { + std::memcpy(out, v.data(), v.size()); + return out + v.size(); +} + +template <typename Float> +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int w = conv.width() >= 0 ? conv.width() : 0; + int p = conv.precision() >= 0 ? conv.precision() : -1; + char fmt[32]; + { + char *fp = fmt; + *fp++ = '%'; + fp = CopyStringTo(FormatConversionSpecImplFriend::FlagsToString(conv), fp); + fp = CopyStringTo("*.*", fp); + if (std::is_same<long double, Float>()) { + *fp++ = 'L'; + } + *fp++ = FormatConversionCharToChar(conv.conversion_char()); + *fp = 0; + assert(fp < fmt + sizeof(fmt)); + } + TString space(512, '\0'); + y_absl::string_view result; + while (true) { + int n = snprintf(&space[0], space.size(), fmt, w, p, v); + if (n < 0) return false; + if (static_cast<size_t>(n) < space.size()) { + result = y_absl::string_view(space.data(), n); + break; + } + space.resize(n + 1); + } + sink->Append(result); + return true; +} + +// 128-bits in decimal: ceil(128*log(2)/log(10)) +// or std::numeric_limits<__uint128_t>::digits10 +constexpr int kMaxFixedPrecision = 39; + +constexpr int kBufferLength = /*sign*/ 1 + + /*integer*/ kMaxFixedPrecision + + /*point*/ 1 + + /*fraction*/ kMaxFixedPrecision + + /*exponent e+123*/ 5; + +struct Buffer { + void push_front(char c) { + assert(begin > data); + *--begin = c; + } + void push_back(char c) { + assert(end < data + sizeof(data)); + *end++ = c; + } + void pop_back() { + assert(begin < end); + --end; + } + + char &back() { + assert(begin < end); + return end[-1]; + } + + char last_digit() const { return end[-1] == '.' ? end[-2] : end[-1]; } + + int size() const { return static_cast<int>(end - begin); } + + char data[kBufferLength]; + char *begin; + char *end; +}; + +enum class FormatStyle { Fixed, Precision }; + +// If the value is Inf or Nan, print it and return true. +// Otherwise, return false. +template <typename Float> +bool ConvertNonNumericFloats(char sign_char, Float v, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + char text[4], *ptr = text; + if (sign_char != '\0') *ptr++ = sign_char; + if (std::isnan(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); + } else if (std::isinf(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); + } else { + return false; + } + + return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, + conv.has_left_flag()); +} + +// Round up the last digit of the value. +// It will carry over and potentially overflow. 'exp' will be adjusted in that +// case. +template <FormatStyle mode> +void RoundUp(Buffer *buffer, int *exp) { + char *p = &buffer->back(); + while (p >= buffer->begin && (*p == '9' || *p == '.')) { + if (*p == '9') *p = '0'; + --p; + } + + if (p < buffer->begin) { + *p = '1'; + buffer->begin = p; + if (mode == FormatStyle::Precision) { + std::swap(p[1], p[2]); // move the . + ++*exp; + buffer->pop_back(); + } + } else { + ++*p; + } +} + +void PrintExponent(int exp, char e, Buffer *out) { + out->push_back(e); + if (exp < 0) { + out->push_back('-'); + exp = -exp; + } else { + out->push_back('+'); + } + // Exponent digits. + if (exp > 99) { + out->push_back(exp / 100 + '0'); + out->push_back(exp / 10 % 10 + '0'); + out->push_back(exp % 10 + '0'); + } else { + out->push_back(exp / 10 + '0'); + out->push_back(exp % 10 + '0'); + } +} + +template <typename Float, typename Int> +constexpr bool CanFitMantissa() { + return +#if defined(__clang__) && !defined(__SSE3__) + // Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289 + // Casting from long double to uint64_t is miscompiled and drops bits. + (!std::is_same<Float, long double>::value || + !std::is_same<Int, uint64_t>::value) && +#endif + std::numeric_limits<Float>::digits <= std::numeric_limits<Int>::digits; +} + +template <typename Float> +struct Decomposed { + using MantissaType = + y_absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; + int exponent; +}; + +// Decompose the double into an integer mantissa and an exponent. +template <typename Float> +Decomposed<Float> Decompose(Float v) { + int exp; + Float m = std::frexp(v, &exp); + m = std::ldexp(m, std::numeric_limits<Float>::digits); + exp -= std::numeric_limits<Float>::digits; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; +} + +// Print 'digits' as decimal. +// In Fixed mode, we add a '.' at the end. +// In Precision mode, we add a '.' after the first digit. +template <FormatStyle mode, typename Int> +int PrintIntegralDigits(Int digits, Buffer *out) { + int printed = 0; + if (digits) { + for (; digits; digits /= 10) out->push_front(digits % 10 + '0'); + printed = out->size(); + if (mode == FormatStyle::Precision) { + out->push_front(*out->begin); + out->begin[1] = '.'; + } else { + out->push_back('.'); + } + } else if (mode == FormatStyle::Fixed) { + out->push_front('0'); + out->push_back('.'); + printed = 1; + } + return printed; +} + +// Back out 'extra_digits' digits and round up if necessary. +bool RemoveExtraPrecision(int extra_digits, bool has_leftover_value, + Buffer *out, int *exp_out) { + if (extra_digits <= 0) return false; + + // Back out the extra digits + out->end -= extra_digits; + + bool needs_to_round_up = [&] { + // We look at the digit just past the end. + // There must be 'extra_digits' extra valid digits after end. + if (*out->end > '5') return true; + if (*out->end < '5') return false; + if (has_leftover_value || std::any_of(out->end + 1, out->end + extra_digits, + [](char c) { return c != '0'; })) + return true; + + // Ends in ...50*, round to even. + return out->last_digit() % 2 == 1; + }(); + + if (needs_to_round_up) { + RoundUp<FormatStyle::Precision>(out, exp_out); + } + return true; +} + +// Print the value into the buffer. +// This will not include the exponent, which will be returned in 'exp_out' for +// Precision mode. +template <typename Int, typename Float, FormatStyle mode> +bool FloatToBufferImpl(Int int_mantissa, int exp, int precision, Buffer *out, + int *exp_out) { + assert((CanFitMantissa<Float, Int>())); + + const int int_bits = std::numeric_limits<Int>::digits; + + // In precision mode, we start printing one char to the right because it will + // also include the '.' + // In fixed mode we put the dot afterwards on the right. + out->begin = out->end = + out->data + 1 + kMaxFixedPrecision + (mode == FormatStyle::Precision); + + if (exp >= 0) { + if (std::numeric_limits<Float>::digits + exp > int_bits) { + // The value will overflow the Int + return false; + } + int digits_printed = PrintIntegralDigits<mode>(int_mantissa << exp, out); + int digits_to_zero_pad = precision; + if (mode == FormatStyle::Precision) { + *exp_out = digits_printed - 1; + digits_to_zero_pad -= digits_printed - 1; + if (RemoveExtraPrecision(-digits_to_zero_pad, false, out, exp_out)) { + return true; + } + } + for (; digits_to_zero_pad-- > 0;) out->push_back('0'); + return true; + } + + exp = -exp; + // We need at least 4 empty bits for the next decimal digit. + // We will multiply by 10. + if (exp > int_bits - 4) return false; + + const Int mask = (Int{1} << exp) - 1; + + // Print the integral part first. + int digits_printed = PrintIntegralDigits<mode>(int_mantissa >> exp, out); + int_mantissa &= mask; + + int fractional_count = precision; + if (mode == FormatStyle::Precision) { + if (digits_printed == 0) { + // Find the first non-zero digit, when in Precision mode. + *exp_out = 0; + if (int_mantissa) { + while (int_mantissa <= mask) { + int_mantissa *= 10; + --*exp_out; + } + } + out->push_front(static_cast<char>(int_mantissa >> exp) + '0'); + out->push_back('.'); + int_mantissa &= mask; + } else { + // We already have a digit, and a '.' + *exp_out = digits_printed - 1; + fractional_count -= *exp_out; + if (RemoveExtraPrecision(-fractional_count, int_mantissa != 0, out, + exp_out)) { + // If we had enough digits, return right away. + // The code below will try to round again otherwise. + return true; + } + } + } + + auto get_next_digit = [&] { + int_mantissa *= 10; + int digit = static_cast<int>(int_mantissa >> exp); + int_mantissa &= mask; + return digit; + }; + + // Print fractional_count more digits, if available. + for (; fractional_count > 0; --fractional_count) { + out->push_back(get_next_digit() + '0'); + } + + int next_digit = get_next_digit(); + if (next_digit > 5 || + (next_digit == 5 && (int_mantissa || out->last_digit() % 2 == 1))) { + RoundUp<mode>(out, exp_out); + } + + return true; +} + +template <FormatStyle mode, typename Float> +bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, + int *exp) { + if (precision > kMaxFixedPrecision) return false; + + // Try with uint64_t. + if (CanFitMantissa<Float, std::uint64_t>() && + FloatToBufferImpl<std::uint64_t, Float, mode>( + static_cast<std::uint64_t>(decomposed.mantissa), + static_cast<std::uint64_t>(decomposed.exponent), precision, out, exp)) + return true; + +#if defined(ABSL_HAVE_INTRINSIC_INT128) + // If that is not enough, try with __uint128_t. + return CanFitMantissa<Float, __uint128_t>() && + FloatToBufferImpl<__uint128_t, Float, mode>( + static_cast<__uint128_t>(decomposed.mantissa), + static_cast<__uint128_t>(decomposed.exponent), precision, out, + exp); +#endif + return false; +} + +void WriteBufferToSink(char sign_char, y_absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int left_spaces = 0, zeros = 0, right_spaces = 0; + int missing_chars = + conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - + static_cast<int>(sign_char != 0), + 0) + : 0; + if (conv.has_left_flag()) { + right_spaces = missing_chars; + } else if (conv.has_zero_flag()) { + zeros = missing_chars; + } else { + left_spaces = missing_chars; + } + + sink->Append(left_spaces, ' '); + if (sign_char != '\0') sink->Append(1, sign_char); + sink->Append(zeros, '0'); + sink->Append(str); + sink->Append(right_spaces, ' '); +} + +template <typename Float> +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + // Print the sign or the sign column. + Float abs_v = v; + char sign_char = 0; + if (std::signbit(abs_v)) { + sign_char = '-'; + abs_v = -abs_v; + } else if (conv.has_show_pos_flag()) { + sign_char = '+'; + } else if (conv.has_sign_col_flag()) { + sign_char = ' '; + } + + // Print nan/inf. + if (ConvertNonNumericFloats(sign_char, abs_v, conv, sink)) { + return true; + } + + int precision = conv.precision() < 0 ? 6 : conv.precision(); + + int exp = 0; + + auto decomposed = Decompose(abs_v); + + Buffer buffer; + + FormatConversionChar c = conv.conversion_char(); + + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + bool uppercase = (c == FormatConversionCharInternal::A); + FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa, + decomposed.exponent, uppercase, {sign_char, precision, conv, sink}); + return true; + } else { + return false; + } + + WriteBufferToSink(sign_char, + y_absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); + + return true; +} + +} // namespace + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + if (IsDoubleDouble()) { + // This is the `double-double` representation of `long double`. We do not + // handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(static_cast<double>(v), conv, sink); +} + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.h new file mode 100644 index 0000000000..d93a415756 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/float_conversion.h @@ -0,0 +1,37 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ + +#include "y_absl/strings/internal/str_format/extension.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.cc new file mode 100644 index 0000000000..ade3f67ef2 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.cc @@ -0,0 +1,72 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/str_format/output.h" + +#include <errno.h> +#include <cstring> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { +struct ClearErrnoGuard { + ClearErrnoGuard() : old_value(errno) { errno = 0; } + ~ClearErrnoGuard() { + if (!errno) errno = old_value; + } + int old_value; +}; +} // namespace + +void BufferRawSink::Write(string_view v) { + size_t to_write = std::min(v.size(), size_); + std::memcpy(buffer_, v.data(), to_write); + buffer_ += to_write; + size_ -= to_write; + total_written_ += v.size(); +} + +void FILERawSink::Write(string_view v) { + while (!v.empty() && !error_) { + // Reset errno to zero in case the libc implementation doesn't set errno + // when a failure occurs. + ClearErrnoGuard guard; + + if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) { + // Some progress was made. + count_ += result; + v.remove_prefix(result); + } else { + if (errno == EINTR) { + continue; + } else if (errno) { + error_ = errno; + } else if (std::ferror(output_)) { + // Non-POSIX compliant libc implementations may not set errno, so we + // have check the streams error indicator. + error_ = EBADF; + } else { + // We're likely on a non-POSIX system that encountered EINTR but had no + // way of reporting it. + continue; + } + } + } +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.h new file mode 100644 index 0000000000..8fc46fbafa --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/output.h @@ -0,0 +1,96 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Output extension hooks for the Format library. +// `internal::InvokeFlush` calls the appropriate flush function for the +// specified output argument. +// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF. +// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ + +#include <cstdio> +#include <ostream> +#include <util/generic/string.h> + +#include "y_absl/base/port.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +// RawSink implementation that writes into a char* buffer. +// It will not overflow the buffer, but will keep the total count of chars +// that would have been written. +class BufferRawSink { + public: + BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {} + + size_t total_written() const { return total_written_; } + void Write(string_view v); + + private: + char* buffer_; + size_t size_; + size_t total_written_ = 0; +}; + +// RawSink implementation that writes into a FILE*. +// It keeps track of the total number of bytes written and any error encountered +// during the writes. +class FILERawSink { + public: + explicit FILERawSink(std::FILE* output) : output_(output) {} + + void Write(string_view v); + + size_t count() const { return count_; } + int error() const { return error_; } + + private: + std::FILE* output_; + int error_ = 0; + size_t count_ = 0; +}; + +// Provide RawSink integration with common types from the STL. +inline void AbslFormatFlush(TString* out, string_view s) { + out->append(s.data(), s.size()); +} +inline void AbslFormatFlush(std::ostream* out, string_view s) { + out->write(s.data(), s.size()); +} + +inline void AbslFormatFlush(FILERawSink* sink, string_view v) { + sink->Write(v); +} + +inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { + sink->Write(v); +} + +// This is a SFINAE to get a better compiler error message when the type +// is not supported. +template <typename T> +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.cc new file mode 100644 index 0000000000..af07e32fe5 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.cc @@ -0,0 +1,339 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/internal/str_format/parser.h" + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <cctype> +#include <cstdint> + +#include <algorithm> +#include <initializer_list> +#include <limits> +#include <ostream> +#include <util/generic/string.h> +#include <unordered_set> + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +using CC = FormatConversionCharInternal; +using LM = LengthMod; + +// Abbreviations to fit in the table below. +constexpr auto f_sign = Flags::kSignCol; +constexpr auto f_alt = Flags::kAlt; +constexpr auto f_pos = Flags::kShowPos; +constexpr auto f_left = Flags::kLeft; +constexpr auto f_zero = Flags::kZero; + +ABSL_CONST_INIT const ConvTag kTags[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ + f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff +}; + +namespace { + +bool CheckFastPathSetting(const UnboundConversion& conv) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); + return false; + } + return true; +} + +template <bool is_positional> +const char *ConsumeConversion(const char *pos, const char *const end, + UnboundConversion *conv, int *next_arg) { + const char* const original_pos = pos; + char c; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + const auto parse_digits = [&] { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if (!std::isdigit(c)) break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; + }; + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = parse_digits(); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags == Flags::kBasic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; + } + } + + if (c <= '9') { + if (c >= '0') { + int maybe_width = parse_digits(); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->flags = conv->flags | Flags::kNonBasic; + conv->width.set_value(maybe_width); + } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(parse_digits()); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (std::isdigit(c)) { + conv->precision.set_value(parse_digits()); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(parse_digits()); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +} // namespace + +TString LengthModToString(LengthMod v) { + switch (v) { + case LengthMod::h: + return "h"; + case LengthMod::hh: + return "hh"; + case LengthMod::l: + return "l"; + case LengthMod::ll: + return "ll"; + case LengthMod::L: + return "L"; + case LengthMod::j: + return "j"; + case LengthMod::z: + return "z"; + case LengthMod::t: + return "t"; + case LengthMod::q: + return "q"; + case LengthMod::none: + return ""; + } + return ""; +} + +const char *ConsumeUnboundConversion(const char *p, const char *end, + UnboundConversion *conv, int *next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +struct ParsedFormatBase::ParsedFormatConsumer { + explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) + : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} + + bool Append(string_view s) { + if (s.empty()) return true; + + size_t text_end = AppendText(s); + + if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) { + // Let's extend the existing text run. + parsed->items_.back().text_end = text_end; + } else { + // Let's make a new text run. + parsed->items_.push_back({false, text_end, {}}); + } + return true; + } + + bool ConvertOne(const UnboundConversion &conv, string_view s) { + size_t text_end = AppendText(s); + parsed->items_.push_back({true, text_end, conv}); + return true; + } + + size_t AppendText(string_view s) { + memcpy(data_pos, s.data(), s.size()); + data_pos += s.size(); + return static_cast<size_t>(data_pos - parsed->data_.get()); + } + + ParsedFormatBase *parsed; + char* data_pos; +}; + +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) + : data_(format.empty() ? nullptr : new char[format.size()]) { + has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || + !MatchesConversions(allow_ignored, convs); +} + +bool ParsedFormatBase::MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { + std::unordered_set<int> used; + auto add_if_valid_conv = [&](int pos, char c) { + if (static_cast<size_t>(pos) > convs.size() || + !Contains(convs.begin()[pos - 1], c)) + return false; + used.insert(pos); + return true; + }; + for (const ConversionItem &item : items_) { + if (!item.is_conversion) continue; + auto &conv = item.conv; + if (conv.precision.is_from_arg() && + !add_if_valid_conv(conv.precision.get_from_arg(), '*')) + return false; + if (conv.width.is_from_arg() && + !add_if_valid_conv(conv.width.get_from_arg(), '*')) + return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; + } + return used.size() == convs.size() || allow_ignored; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.h new file mode 100644 index 0000000000..ba614bb8b4 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/parser.h @@ -0,0 +1,357 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ + +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> + +#include <cassert> +#include <cstdint> +#include <initializer_list> +#include <iosfwd> +#include <iterator> +#include <memory> +#include <util/generic/string.h> +#include <vector> + +#include "y_absl/strings/internal/str_format/checker.h" +#include "y_absl/strings/internal/str_format/extension.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +TString LengthModToString(LengthMod v); + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + UnboundConversion() {} + + class InputValue { + public: + void set_value(int value) { + assert(value >= 0); + value_ = value; + } + int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecfied in this state. + void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + bool is_from_arg() const { return value_ < -1; } + int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags = Flags::kBasic; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, int* next_arg); + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + bool is_conv() const { return (tag_ & 0x80) == 0; } + bool is_length() const { return (tag_ & 0xC0) == 0x80; } + bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } + + FormatConversionChar as_conv() const { + assert(is_conv()); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); + } + LengthMod as_length() const { + assert(!is_conv()); + assert(is_length()); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); + } + + private: + uint8_t tag_; +}; + +extern const ConvTag kTags[256]; +// Keep a single table for all the conversion chars and length modifiers. +inline ConvTag GetTagForChar(char c) { + return kTags[static_cast<unsigned char>(c)]; +} + +// Parse the format string provided in 'src' and pass the identified items into +// 'consumer'. +// Text runs will be passed by calling +// Consumer::Append(string_view); +// ConversionItems will be passed by calling +// Consumer::ConvertOne(UnboundConversion, string_view); +// In the case of ConvertOne, the string_view that is passed is the +// portion of the format string corresponding to the conversion, not including +// the leading %. On success, it returns true. On failure, it stops and returns +// false. +template <typename Consumer> +bool ParseFormatString(string_view src, Consumer consumer) { + int next_arg = 0; + const char* p = src.data(); + const char* const end = p + src.size(); + while (p != end) { + const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); + if (!percent) { + // We found the last substring. + return consumer.Append(string_view(p, end - p)); + } + // We found a percent, so push the text run then process the percent. + if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { + return false; + } + if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; + + auto tag = GetTagForChar(percent[1]); + if (tag.is_conv()) { + if (ABSL_PREDICT_FALSE(next_arg < 0)) { + // This indicates an error in the format string. + // The only way to get `next_arg < 0` here is to have a positional + // argument first which sets next_arg to -1 and then a non-positional + // argument. + return false; + } + p = percent + 2; + + // Keep this case separate from the one below. + // ConvertOne is more efficient when the compiler can see that the `basic` + // flag is set. + UnboundConversion conv; + conv.conv = tag.as_conv(); + conv.arg_position = ++next_arg; + if (ABSL_PREDICT_FALSE( + !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { + return false; + } + } else if (percent[1] != '%') { + UnboundConversion conv; + p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + if (ABSL_PREDICT_FALSE(p == nullptr)) return false; + if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( + conv, string_view(percent + 1, p - (percent + 1))))) { + return false; + } + } else { + if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false; + p = percent + 2; + continue; + } + } + return true; +} + +// Always returns true, or fails to compile in a constexpr context if s does not +// point to a constexpr char array. +constexpr bool EnsureConstexpr(string_view s) { + return s.empty() || s[0] == s[0]; +} + +class ParsedFormatBase { + public: + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); + + ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } + + ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } + + ParsedFormatBase& operator=(const ParsedFormatBase& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + items_ = other.items_; + size_t text_size = items_.empty() ? 0 : items_.back().text_end; + data_.reset(new char[text_size]); + memcpy(data_.get(), other.data_.get(), text_size); + return *this; + } + + ParsedFormatBase& operator=(ParsedFormatBase&& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + data_ = std::move(other.data_); + items_ = std::move(other.items_); + // Reset the vector to make sure the invariants hold. + other.items_.clear(); + return *this; + } + + template <typename Consumer> + bool ProcessFormat(Consumer consumer) const { + const char* const base = data_.get(); + string_view text(base, 0); + for (const auto& item : items_) { + const char* const end = text.data() + text.size(); + text = string_view(end, (base + item.text_end) - end); + if (item.is_conversion) { + if (!consumer.ConvertOne(item.conv, text)) return false; + } else { + if (!consumer.Append(text)) return false; + } + } + return !has_error_; + } + + bool has_error() const { return has_error_; } + + private: + // Returns whether the conversions match and if !allow_ignored it verifies + // that all conversions are used by the format. + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; + + struct ParsedFormatConsumer; + + struct ConversionItem { + bool is_conversion; + // Points to the past-the-end location of this element in the data_ array. + size_t text_end; + UnboundConversion conv; + }; + + bool has_error_; + std::unique_ptr<char[]> data_; + std::vector<ConversionItem> items_; +}; + + +// A value type representing a preparsed format. These can be created, copied +// around, and reused to speed up formatting loops. +// The user must specify through the template arguments the conversion +// characters used in the format. This will be checked at compile time. +// +// This class uses Conv enum values to specify each argument. +// This allows for more flexibility as you can specify multiple possible +// conversion characters for each argument. +// ParsedFormat<char...> is a simplified alias for when the user only +// needs to specify a single conversion character for each argument. +// +// Example: +// // Extended format supports multiple characters per argument: +// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // 'format' can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +// +// This class also supports runtime format checking with the ::New() and +// ::NewAllowIgnored() factory functions. +// This is the only API that allows the user to pass a runtime specified format +// string. These factory functions will return NULL if the format does not match +// the conversions requested by the user. +template <FormatConversionCharSet... C> +class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { + public: + explicit ExtendedParsedFormat(string_view format) +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(format), + "Format string is not constexpr."), + enable_if(str_format_internal::ValidFormatImpl<C...>(format), + "Format specified does not match the template arguments."))) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + : ExtendedParsedFormat(format, false) { + } + + // ExtendedParsedFormat factory function. + // The user still has to specify the conversion characters, but they will not + // be checked at compile time. Instead, it will be checked at runtime. + // This delays the checking to runtime, but allows the user to pass + // dynamically sourced formats. + // It returns NULL if the format does not match the conversion characters. + // The user is responsible for checking the return value before using it. + // + // The 'New' variant will check that all the specified arguments are being + // consumed by the format and return NULL if any argument is being ignored. + // The 'NewAllowIgnored' variant will not verify this and will allow formats + // that ignore arguments. + static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { + return New(format, false); + } + static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( + string_view format) { + return New(format, true); + } + + private: + static std::unique_ptr<ExtendedParsedFormat> New(string_view format, + bool allow_ignored) { + std::unique_ptr<ExtendedParsedFormat> conv( + new ExtendedParsedFormat(format, allow_ignored)); + if (conv->has_error()) return nullptr; + return conv; + } + + ExtendedParsedFormat(string_view s, bool allow_ignored) + : ParsedFormatBase(s, allow_ignored, {C...}) {} +}; +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/ya.make new file mode 100644 index 0000000000..ff8069cd0f --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_format/ya.make @@ -0,0 +1,40 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/strings + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCS( + arg.cc + bind.cc + extension.cc + float_conversion.cc + output.cc + parser.cc +) + +END() diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_join_internal.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_join_internal.h new file mode 100644 index 0000000000..0a220fa33d --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_join_internal.h @@ -0,0 +1,314 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Join API that are inlined/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in this file are: +// +// - A handful of default Formatters +// - JoinAlgorithm() overloads +// - JoinRange() overloads +// - JoinTuple() +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// y_absl/strings/str_join.h +// +// IWYU pragma: private, include "y_absl/strings/str_join.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ + +#include <cstring> +#include <iterator> +#include <memory> +#include <util/generic/string.h> +#include <type_traits> +#include <utility> + +#include "y_absl/strings/internal/ostringstream.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/str_cat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// +// Formatter objects +// +// The following are implementation classes for standard Formatter objects. The +// factory functions that users will call to create and use these formatters are +// defined and documented in strings/join.h. +// + +// The default formatter. Converts alpha-numeric types to strings. +struct AlphaNumFormatterImpl { + // This template is needed in order to support passing in a dereferenced + // vector<bool>::iterator + template <typename T> + void operator()(TString* out, const T& t) const { + StrAppend(out, AlphaNum(t)); + } + + void operator()(TString* out, const AlphaNum& t) const { + StrAppend(out, t); + } +}; + +// A type that's used to overload the JoinAlgorithm() function (defined below) +// for ranges that do not require additional formatting (e.g., a range of +// strings). + +struct NoFormatter : public AlphaNumFormatterImpl {}; + +// Formats types to strings using the << operator. +class StreamFormatterImpl { + public: + // The method isn't const because it mutates state. Making it const will + // render StreamFormatterImpl thread-hostile. + template <typename T> + void operator()(TString* out, const T& t) { + // The stream is created lazily to avoid paying the relatively high cost + // of its construction when joining an empty range. + if (strm_) { + strm_->clear(); // clear the bad, fail and eof bits in case they were set + strm_->str(out); + } else { + strm_.reset(new strings_internal::OStringStream(out)); + } + *strm_ << t; + } + + private: + std::unique_ptr<strings_internal::OStringStream> strm_; +}; + +// Formats a std::pair<>. The 'first' member is formatted using f1_ and the +// 'second' member is formatted using f2_. sep_ is the separator. +template <typename F1, typename F2> +class PairFormatterImpl { + public: + PairFormatterImpl(F1 f1, y_absl::string_view sep, F2 f2) + : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {} + + template <typename T> + void operator()(TString* out, const T& p) { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + template <typename T> + void operator()(TString* out, const T& p) const { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + private: + F1 f1_; + TString sep_; + F2 f2_; +}; + +// Wraps another formatter and dereferences the argument to operator() then +// passes the dereferenced argument to the wrapped formatter. This can be +// useful, for example, to join a std::vector<int*>. +template <typename Formatter> +class DereferenceFormatterImpl { + public: + DereferenceFormatterImpl() : f_() {} + explicit DereferenceFormatterImpl(Formatter&& f) + : f_(std::forward<Formatter>(f)) {} + + template <typename T> + void operator()(TString* out, const T& t) { + f_(out, *t); + } + + template <typename T> + void operator()(TString* out, const T& t) const { + f_(out, *t); + } + + private: + Formatter f_; +}; + +// DefaultFormatter<T> is a traits class that selects a default Formatter to use +// for the given type T. The ::Type member names the Formatter to use. This is +// used by the strings::Join() functions that do NOT take a Formatter argument, +// in which case a default Formatter must be chosen. +// +// AlphaNumFormatterImpl is the default in the base template, followed by +// specializations for other types. +template <typename ValueType> +struct DefaultFormatter { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<const char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<TString> { + typedef NoFormatter Type; +}; +template <> +struct DefaultFormatter<y_absl::string_view> { + typedef NoFormatter Type; +}; +template <typename ValueType> +struct DefaultFormatter<ValueType*> { + typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type> + Type; +}; + +template <typename ValueType> +struct DefaultFormatter<std::unique_ptr<ValueType>> + : public DefaultFormatter<ValueType*> {}; + +// +// JoinAlgorithm() functions +// + +// The main joining algorithm. This simply joins the elements in the given +// iterator range, each separated by the given separator, into an output string, +// and formats each element using the provided Formatter object. +template <typename Iterator, typename Formatter> +TString JoinAlgorithm(Iterator start, Iterator end, y_absl::string_view s, + Formatter&& f) { + TString result; + y_absl::string_view sep(""); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + f(&result, *it); + sep = s; + } + return result; +} + +// A joining algorithm that's optimized for a forward iterator range of +// string-like objects that do not need any additional formatting. This is to +// optimize the common case of joining, say, a std::vector<string> or a +// std::vector<y_absl::string_view>. +// +// This is an overload of the previous JoinAlgorithm() function. Here the +// Formatter argument is of type NoFormatter. Since NoFormatter is an internal +// type, this overload is only invoked when strings::Join() is called with a +// range of string-like objects (e.g., TString, y_absl::string_view), and an +// explicit Formatter argument was NOT specified. +// +// The optimization is that the needed space will be reserved in the output +// string to avoid the need to resize while appending. To do this, the iterator +// range will be traversed twice: once to calculate the total needed size, and +// then again to copy the elements and delimiters to the output string. +template <typename Iterator, + typename = typename std::enable_if<std::is_convertible< + typename std::iterator_traits<Iterator>::iterator_category, + std::forward_iterator_tag>::value>::type> +TString JoinAlgorithm(Iterator start, Iterator end, y_absl::string_view s, + NoFormatter) { + TString result; + if (start != end) { + // Sums size + size_t result_size = start->size(); + for (Iterator it = start; ++it != end;) { + result_size += s.size(); + result_size += it->size(); + } + + if (result_size > 0) { + STLStringResizeUninitialized(&result, result_size); + + // Joins strings + char* result_buf = &*result.begin(); + memcpy(result_buf, start->data(), start->size()); + result_buf += start->size(); + for (Iterator it = start; ++it != end;) { + memcpy(result_buf, s.data(), s.size()); + result_buf += s.size(); + memcpy(result_buf, it->data(), it->size()); + result_buf += it->size(); + } + } + } + + return result; +} + +// JoinTupleLoop implements a loop over the elements of a std::tuple, which +// are heterogeneous. The primary template matches the tuple interior case. It +// continues the iteration after appending a separator (for nonzero indices) +// and formatting an element of the tuple. The specialization for the I=N case +// matches the end-of-tuple, and terminates the iteration. +template <size_t I, size_t N> +struct JoinTupleLoop { + template <typename Tup, typename Formatter> + void operator()(TString* out, const Tup& tup, y_absl::string_view sep, + Formatter&& fmt) { + if (I > 0) out->append(sep.data(), sep.size()); + fmt(out, std::get<I>(tup)); + JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt); + } +}; +template <size_t N> +struct JoinTupleLoop<N, N> { + template <typename Tup, typename Formatter> + void operator()(TString*, const Tup&, y_absl::string_view, Formatter&&) {} +}; + +template <typename... T, typename Formatter> +TString JoinAlgorithm(const std::tuple<T...>& tup, y_absl::string_view sep, + Formatter&& fmt) { + TString result; + JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt); + return result; +} + +template <typename Iterator> +TString JoinRange(Iterator first, Iterator last, + y_absl::string_view separator) { + // No formatter was explicitly given, so a default must be chosen. + typedef typename std::iterator_traits<Iterator>::value_type ValueType; + typedef typename DefaultFormatter<ValueType>::Type Formatter; + return JoinAlgorithm(first, last, separator, Formatter()); +} + +template <typename Range, typename Formatter> +TString JoinRange(const Range& range, y_absl::string_view separator, + Formatter&& fmt) { + using std::begin; + using std::end; + return JoinAlgorithm(begin(range), end(range), separator, fmt); +} + +template <typename Range> +TString JoinRange(const Range& range, y_absl::string_view separator) { + using std::begin; + using std::end; + return JoinRange(begin(range), end(range), separator); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_split_internal.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_split_internal.h new file mode 100644 index 0000000000..237864c0ed --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/str_split_internal.h @@ -0,0 +1,430 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Split API that are inline/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in here are +// +// - ConvertibleToStringView +// - SplitIterator<> +// - Splitter<> +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// y_absl/strings/str_split.h. +// +// IWYU pragma: private, include "y_absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ + +#include <array> +#include <initializer_list> +#include <iterator> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" +#include "y_absl/meta/type_traits.h" +#include "y_absl/strings/string_view.h" + +#ifdef _GLIBCXX_DEBUG +#include "y_absl/strings/internal/stl_type_traits.h" +#endif // _GLIBCXX_DEBUG + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// This class is implicitly constructible from everything that y_absl::string_view +// is implicitly constructible from, except for rvalue strings. This means it +// can be used as a function parameter in places where passing a temporary +// string might cause memory lifetime issues. +class ConvertibleToStringView { + public: + ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) + ConvertibleToStringView(y_absl::string_view s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(const TString& s) // NOLINT(runtime/explicit) + : value_(s) {} + + // Disable conversion from rvalue strings. + ConvertibleToStringView(TString&& s) = delete; + ConvertibleToStringView(const TString&& s) = delete; + + y_absl::string_view value() const { return value_; } + + private: + y_absl::string_view value_; +}; + +// An iterator that enumerates the parts of a string from a Splitter. The text +// to be split, the Delimiter, and the Predicate are all taken from the given +// Splitter object. Iterators may only be compared if they refer to the same +// Splitter instance. +// +// This class is NOT part of the public splitting API. +template <typename Splitter> +class SplitIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = y_absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + enum State { kInitState, kLastState, kEndState }; + SplitIterator(State state, const Splitter* splitter) + : pos_(0), + state_(state), + splitter_(splitter), + delimiter_(splitter->delimiter()), + predicate_(splitter->predicate()) { + // Hack to maintain backward compatibility. This one block makes it so an + // empty y_absl::string_view whose .data() happens to be nullptr behaves + // *differently* from an otherwise empty y_absl::string_view whose .data() is + // not nullptr. This is an undesirable difference in general, but this + // behavior is maintained to avoid breaking existing code that happens to + // depend on this old behavior/bug. Perhaps it will be fixed one day. The + // difference in behavior is as follows: + // Split(y_absl::string_view(""), '-'); // {""} + // Split(y_absl::string_view(), '-'); // {} + if (splitter_->text().data() == nullptr) { + state_ = kEndState; + pos_ = splitter_->text().size(); + return; + } + + if (state_ == kEndState) { + pos_ = splitter_->text().size(); + } else { + ++(*this); + } + } + + bool at_end() const { return state_ == kEndState; } + + reference operator*() const { return curr_; } + pointer operator->() const { return &curr_; } + + SplitIterator& operator++() { + do { + if (state_ == kLastState) { + state_ = kEndState; + return *this; + } + const y_absl::string_view text = splitter_->text(); + const y_absl::string_view d = delimiter_.Find(text, pos_); + if (d.data() == text.data() + text.size()) state_ = kLastState; + curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); + pos_ += curr_.size() + d.size(); + } while (!predicate_(curr_)); + return *this; + } + + SplitIterator operator++(int) { + SplitIterator old(*this); + ++(*this); + return old; + } + + friend bool operator==(const SplitIterator& a, const SplitIterator& b) { + return a.state_ == b.state_ && a.pos_ == b.pos_; + } + + friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { + return !(a == b); + } + + private: + size_t pos_; + State state_; + y_absl::string_view curr_; + const Splitter* splitter_; + typename Splitter::DelimiterType delimiter_; + typename Splitter::PredicateType predicate_; +}; + +// HasMappedType<T>::value is true iff there exists a type T::mapped_type. +template <typename T, typename = void> +struct HasMappedType : std::false_type {}; +template <typename T> +struct HasMappedType<T, y_absl::void_t<typename T::mapped_type>> + : std::true_type {}; + +// HasValueType<T>::value is true iff there exists a type T::value_type. +template <typename T, typename = void> +struct HasValueType : std::false_type {}; +template <typename T> +struct HasValueType<T, y_absl::void_t<typename T::value_type>> : std::true_type { +}; + +// HasConstIterator<T>::value is true iff there exists a type T::const_iterator. +template <typename T, typename = void> +struct HasConstIterator : std::false_type {}; +template <typename T> +struct HasConstIterator<T, y_absl::void_t<typename T::const_iterator>> + : std::true_type {}; + +// HasEmplace<T>::value is true iff there exists a method T::emplace(). +template <typename T, typename = void> +struct HasEmplace : std::false_type {}; +template <typename T> +struct HasEmplace<T, y_absl::void_t<decltype(std::declval<T>().emplace())>> + : std::true_type {}; + +// IsInitializerList<T>::value is true iff T is an std::initializer_list. More +// details below in Splitter<> where this is used. +std::false_type IsInitializerListDispatch(...); // default: No +template <typename T> +std::true_type IsInitializerListDispatch(std::initializer_list<T>*); +template <typename T> +struct IsInitializerList + : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; + +// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition +// is true for type 'C'. +// +// Restricts conversion to container-like types (by testing for the presence of +// a const_iterator member type) and also to disable conversion to an +// std::initializer_list (which also has a const_iterator). Otherwise, code +// compiled in C++11 will get an error due to ambiguous conversion paths (in +// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> +// or an std::initializer_list<T>). + +template <typename C, bool has_value_type, bool has_mapped_type> +struct SplitterIsConvertibleToImpl : std::false_type {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, false> + : std::is_constructible<typename C::value_type, y_absl::string_view> {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, true> + : y_absl::conjunction< + std::is_constructible<typename C::key_type, y_absl::string_view>, + std::is_constructible<typename C::mapped_type, y_absl::string_view>> {}; + +template <typename C> +struct SplitterIsConvertibleTo + : SplitterIsConvertibleToImpl< + C, +#ifdef _GLIBCXX_DEBUG + !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && +#endif // _GLIBCXX_DEBUG + !IsInitializerList< + typename std::remove_reference<C>::type>::value && + HasValueType<C>::value && HasConstIterator<C>::value, + HasMappedType<C>::value> { +}; + +// This class implements the range that is returned by y_absl::StrSplit(). This +// class has templated conversion operators that allow it to be implicitly +// converted to a variety of types that the caller may have specified on the +// left-hand side of an assignment. +// +// The main interface for interacting with this class is through its implicit +// conversion operators. However, this class may also be used like a container +// in that it has .begin() and .end() member functions. It may also be used +// within a range-for loop. +// +// Output containers can be collections of any type that is constructible from +// an y_absl::string_view. +// +// An Predicate functor may be supplied. This predicate will be used to filter +// the split strings: only strings for which the predicate returns true will be +// kept. A Predicate object is any unary functor that takes an y_absl::string_view +// and returns bool. +// +// The StringType parameter can be either string_view or string, depending on +// whether the Splitter refers to a string stored elsewhere, or if the string +// resides inside the Splitter itself. +template <typename Delimiter, typename Predicate, typename StringType> +class Splitter { + public: + using DelimiterType = Delimiter; + using PredicateType = Predicate; + using const_iterator = strings_internal::SplitIterator<Splitter>; + using value_type = typename std::iterator_traits<const_iterator>::value_type; + + Splitter(StringType input_text, Delimiter d, Predicate p) + : text_(std::move(input_text)), + delimiter_(std::move(d)), + predicate_(std::move(p)) {} + + y_absl::string_view text() const { return text_; } + const Delimiter& delimiter() const { return delimiter_; } + const Predicate& predicate() const { return predicate_; } + + // Range functions that iterate the split substrings as y_absl::string_view + // objects. These methods enable a Splitter to be used in a range-based for + // loop. + const_iterator begin() const { return {const_iterator::kInitState, this}; } + const_iterator end() const { return {const_iterator::kEndState, this}; } + + // An implicit conversion operator that is restricted to only those containers + // that the splitter is convertible to. + template <typename Container, + typename = typename std::enable_if< + SplitterIsConvertibleTo<Container>::value>::type> + operator Container() const { // NOLINT(runtime/explicit) + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be constructed with empty strings if the iterator doesn't have a + // corresponding value. + template <typename First, typename Second> + operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + y_absl::string_view first, second; + auto it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return {First(first), Second(second)}; + } + + private: + // ConvertToContainer is a functor converting a Splitter to the requested + // Container of ValueType. It is specialized below to optimize splitting to + // certain combinations of Container and ValueType. + // + // This base template handles the generic case of storing the split results in + // the requested non-map-like container and converting the split substrings to + // the requested type. + template <typename Container, typename ValueType, bool is_map = false> + struct ConvertToContainer { + Container operator()(const Splitter& splitter) const { + Container c; + auto it = std::inserter(c, c.end()); + for (const auto& sp : splitter) { + *it++ = ValueType(sp); + } + return c; + } + }; + + // Partial specialization for a std::vector<y_absl::string_view>. + // + // Optimized for the common case of splitting to a + // std::vector<y_absl::string_view>. In this case we first split the results to + // a small array of y_absl::string_view on the stack, to reduce reallocations. + template <typename A> + struct ConvertToContainer<std::vector<y_absl::string_view, A>, + y_absl::string_view, false> { + std::vector<y_absl::string_view, A> operator()( + const Splitter& splitter) const { + struct raw_view { + const char* data; + size_t size; + operator y_absl::string_view() const { // NOLINT(runtime/explicit) + return {data, size}; + } + }; + std::vector<y_absl::string_view, A> v; + std::array<raw_view, 16> ar; + for (auto it = splitter.begin(); !it.at_end();) { + size_t index = 0; + do { + ar[index].data = it->data(); + ar[index].size = it->size(); + ++it; + } while (++index != ar.size() && !it.at_end()); + v.insert(v.end(), ar.begin(), ar.begin() + index); + } + return v; + } + }; + + // Partial specialization for a std::vector<TString>. + // + // Optimized for the common case of splitting to a std::vector<TString>. + // In this case we first split the results to a std::vector<y_absl::string_view> + // so the returned std::vector<TString> can have space reserved to avoid + // TString moves. + template <typename A> + struct ConvertToContainer<std::vector<TString, A>, TString, false> { + std::vector<TString, A> operator()(const Splitter& splitter) const { + const std::vector<y_absl::string_view> v = splitter; + return std::vector<TString, A>(v.begin(), v.end()); + } + }; + + // Partial specialization for containers of pairs (e.g., maps). + // + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template <typename Container, typename First, typename Second> + struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + using iterator = typename Container::iterator; + + Container operator()(const Splitter& splitter) const { + Container m; + iterator it; + bool insert = true; + for (const y_absl::string_view sv : splitter) { + if (insert) { + it = InsertOrEmplace(&m, sv); + } else { + it->second = Second(sv); + } + insert = !insert; + } + return m; + } + + // Inserts the key and an empty value into the map, returning an iterator to + // the inserted item. We use emplace() if available, otherwise insert(). + template <typename M> + static y_absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, y_absl::string_view key) { + // Use piecewise_construct to support old versions of gcc in which pair + // constructor can't otherwise construct string from string_view. + return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), + std::tuple<>())); + } + template <typename M> + static y_absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, y_absl::string_view key) { + return ToIter(m->insert(std::make_pair(First(key), Second("")))); + } + + static iterator ToIter(std::pair<iterator, bool> pair) { + return pair.first; + } + static iterator ToIter(iterator iter) { return iter; } + }; + + StringType text_; + Delimiter delimiter_; + Predicate predicate_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/string_constant.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/string_constant.h new file mode 100644 index 0000000000..b18e821b49 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/string_constant.h @@ -0,0 +1,64 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ +#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ + +#include "y_absl/meta/type_traits.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// StringConstant<T> represents a compile time string constant. +// It can be accessed via its `y_absl::string_view value` static member. +// It is guaranteed that the `string_view` returned has constant `.data()`, +// constant `.size()` and constant `value[i]` for all `0 <= i < .size()` +// +// The `T` is an opaque type. It is guaranteed that different string constants +// will have different values of `T`. This allows users to associate the string +// constant with other static state at compile time. +// +// Instances should be made using the `MakeStringConstant()` factory function +// below. +template <typename T> +struct StringConstant { + static constexpr y_absl::string_view value = T{}(); + constexpr y_absl::string_view operator()() const { return value; } + + // Check to be sure `view` points to constant data. + // Otherwise, it can't be constant evaluated. + static_assert(value.empty() || 2 * value[0] != 1, + "The input string_view must point to constant data."); +}; + +template <typename T> +constexpr y_absl::string_view StringConstant<T>::value; // NOLINT + +// Factory function for `StringConstant` instances. +// It supports callables that have a constexpr default constructor and a +// constexpr operator(). +// It must return an `y_absl::string_view` or `const char*` pointing to constant +// data. This is validated at compile time. +template <typename T> +constexpr StringConstant<T> MakeStringConstant(T) { + return {}; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.cc new file mode 100644 index 0000000000..06b1cae79d --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.cc @@ -0,0 +1,53 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// UTF8 utilities, implemented to reduce dependencies. + +#include "y_absl/strings/internal/utf8.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { + if (utf8_char <= 0x7F) { + *buffer = static_cast<char>(utf8_char); + return 1; + } else if (utf8_char <= 0x7FF) { + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xC0 | utf8_char; + return 2; + } else if (utf8_char <= 0xFFFF) { + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xE0 | utf8_char; + return 3; + } else { + buffer[3] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xF0 | utf8_char; + return 4; + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.h new file mode 100644 index 0000000000..1b2d6abd51 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/utf8.h @@ -0,0 +1,50 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include <cstddef> +#include <cstdint> + +#include "y_absl/base/config.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.cc new file mode 100644 index 0000000000..3197bdf432 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.cc @@ -0,0 +1,43 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/match.h" + +#include "y_absl/strings/internal/memutil.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +bool EqualsIgnoreCase(y_absl::string_view piece1, + y_absl::string_view piece2) noexcept { + return (piece1.size() == piece2.size() && + 0 == y_absl::strings_internal::memcasecmp(piece1.data(), piece2.data(), + piece1.size())); + // memcasecmp uses y_absl::ascii_tolower(). +} + +bool StartsWithIgnoreCase(y_absl::string_view text, + y_absl::string_view prefix) noexcept { + return (text.size() >= prefix.size()) && + EqualsIgnoreCase(text.substr(0, prefix.size()), prefix); +} + +bool EndsWithIgnoreCase(y_absl::string_view text, + y_absl::string_view suffix) noexcept { + return (text.size() >= suffix.size()) && + EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.h new file mode 100644 index 0000000000..4709abc93f --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/match.h @@ -0,0 +1,100 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: match.h +// ----------------------------------------------------------------------------- +// +// This file contains simple utilities for performing string matching checks. +// All of these function parameters are specified as `y_absl::string_view`, +// meaning that these functions can accept `TString`, `y_absl::string_view` or +// NUL-terminated C-style strings. +// +// Examples: +// TString s = "foo"; +// y_absl::string_view sv = "f"; +// assert(y_absl::StrContains(s, sv)); +// +// Note: The order of parameters in these functions is designed to mimic the +// order an equivalent member function would exhibit; +// e.g. `s.Contains(x)` ==> `y_absl::StrContains(s, x). +#ifndef ABSL_STRINGS_MATCH_H_ +#define ABSL_STRINGS_MATCH_H_ + +#include <cstring> + +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// StrContains() +// +// Returns whether a given string `haystack` contains the substring `needle`. +inline bool StrContains(y_absl::string_view haystack, + y_absl::string_view needle) noexcept { + return haystack.find(needle, 0) != haystack.npos; +} + +inline bool StrContains(y_absl::string_view haystack, char needle) noexcept { + return haystack.find(needle) != haystack.npos; +} + +// StartsWith() +// +// Returns whether a given string `text` begins with `prefix`. +inline bool StartsWith(y_absl::string_view text, + y_absl::string_view prefix) noexcept { + return prefix.empty() || + (text.size() >= prefix.size() && + memcmp(text.data(), prefix.data(), prefix.size()) == 0); +} + +// EndsWith() +// +// Returns whether a given string `text` ends with `suffix`. +inline bool EndsWith(y_absl::string_view text, + y_absl::string_view suffix) noexcept { + return suffix.empty() || + (text.size() >= suffix.size() && + memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), + suffix.size()) == 0); +} + +// EqualsIgnoreCase() +// +// Returns whether given ASCII strings `piece1` and `piece2` are equal, ignoring +// case in the comparison. +bool EqualsIgnoreCase(y_absl::string_view piece1, + y_absl::string_view piece2) noexcept; + +// StartsWithIgnoreCase() +// +// Returns whether a given ASCII string `text` starts with `prefix`, +// ignoring case in the comparison. +bool StartsWithIgnoreCase(y_absl::string_view text, + y_absl::string_view prefix) noexcept; + +// EndsWithIgnoreCase() +// +// Returns whether a given ASCII string `text` ends with `suffix`, ignoring +// case in the comparison. +bool EndsWithIgnoreCase(y_absl::string_view text, + y_absl::string_view suffix) noexcept; + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_MATCH_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.cc new file mode 100644 index 0000000000..528d044fa6 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.cc @@ -0,0 +1,1093 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains string processing functions related to +// numeric values. + +#include "y_absl/strings/numbers.h" + +#include <algorithm> +#include <cassert> +#include <cfloat> // for DBL_DIG and FLT_DIG +#include <cmath> // for HUGE_VAL +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/numeric/bits.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/charconv.h" +#include "y_absl/strings/escaping.h" +#include "y_absl/strings/internal/memutil.h" +#include "y_absl/strings/match.h" +#include "y_absl/strings/str_cat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +bool SimpleAtof(y_absl::string_view str, float* out) { + *out = 0.0; + str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } + } + auto result = y_absl::from_chars(str.data(), str.data() + str.size(), *out); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3081's current wording will return max() on + // overflow. SimpleAtof returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*out > 1.0) { + *out = std::numeric_limits<float>::infinity(); + } else if (*out < -1.0) { + *out = -std::numeric_limits<float>::infinity(); + } + } + return true; +} + +bool SimpleAtod(y_absl::string_view str, double* out) { + *out = 0.0; + str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } + } + auto result = y_absl::from_chars(str.data(), str.data() + str.size(), *out); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3081's current wording will return max() on + // overflow. SimpleAtod returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*out > 1.0) { + *out = std::numeric_limits<double>::infinity(); + } else if (*out < -1.0) { + *out = -std::numeric_limits<double>::infinity(); + } + } + return true; +} + +bool SimpleAtob(y_absl::string_view str, bool* out) { + ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr."); + if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") || + EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") || + EqualsIgnoreCase(str, "1")) { + *out = true; + return true; + } + if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") || + EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") || + EqualsIgnoreCase(str, "0")) { + *out = false; + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// FastIntToBuffer() overloads +// +// Like the Fast*ToBuffer() functions above, these are intended for speed. +// Unlike the Fast*ToBuffer() functions, however, these functions write +// their output to the beginning of the buffer. The caller is responsible +// for ensuring that the buffer has enough space to hold the output. +// +// Returns a pointer to the end of the string (i.e. the null character +// terminating the string). +// ---------------------------------------------------------------------- + +namespace { + +// Used to optimize printing a decimal number's final digit. +const char one_ASCII_final_digits[10][2] { + {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0}, + {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0}, +}; + +} // namespace + +char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) { + uint32_t digits; + // The idea of this implementation is to trim the number of divides to as few + // as possible, and also reducing memory stores and branches, by going in + // steps of two digits at a time rather than one whenever possible. + // The huge-number case is first, in the hopes that the compiler will output + // that case in one branch-free block of code, and only output conditional + // branches into it from below. + if (i >= 1000000000) { // >= 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100_000_000: + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt1_000_000: + digits = i / 10000; // 10,000 + i -= digits * 10000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt10_000: + digits = i / 100; + i -= digits * 100; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100: + digits = i; + PutTwoDigits(digits, buffer); + buffer += 2; + *buffer = 0; + return buffer; + } + + if (i < 100) { + digits = i; + if (i >= 10) goto lt100; + memcpy(buffer, one_ASCII_final_digits[i], 2); + return buffer + 1; + } + if (i < 10000) { // 10,000 + if (i >= 1000) goto lt10_000; + digits = i / 100; + i -= digits * 100; + *buffer++ = '0' + digits; + goto lt100; + } + if (i < 1000000) { // 1,000,000 + if (i >= 100000) goto lt1_000_000; + digits = i / 10000; // 10,000 + i -= digits * 10000; + *buffer++ = '0' + digits; + goto lt10_000; + } + if (i < 100000000) { // 100,000,000 + if (i >= 10000000) goto lt100_000_000; + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + *buffer++ = '0' + digits; + goto lt1_000_000; + } + // we already know that i < 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + *buffer++ = '0' + digits; + goto lt100_000_000; +} + +char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { + uint32_t u = i; + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apprently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + return numbers_internal::FastIntToBuffer(u, buffer); +} + +char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) { + uint32_t u32 = static_cast<uint32_t>(i); + if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer); + + // Here we know i has at least 10 decimal digits. + uint64_t top_1to11 = i / 1000000000; + u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000); + uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11); + + if (top_1to11_32 == top_1to11) { + buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer); + } else { + // top_1to11 has more than 32 bits too; print it in two steps. + uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100); + uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100); + buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer); + PutTwoDigits(mid_2, buffer); + buffer += 2; + } + + // We have only 9 digits now, again the maximum uint32_t can handle fully. + uint32_t digits = u32 / 10000000; // 10,000,000 + u32 -= digits * 10000000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 100000; // 100,000 + u32 -= digits * 100000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 1000; // 1,000 + u32 -= digits * 1000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 10; + u32 -= digits * 10; + PutTwoDigits(digits, buffer); + buffer += 2; + memcpy(buffer, one_ASCII_final_digits[u32], 2); + return buffer + 1; +} + +char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) { + uint64_t u = i; + if (i < 0) { + *buffer++ = '-'; + u = 0 - u; + } + return numbers_internal::FastIntToBuffer(u, buffer); +} + +// Given a 128-bit number expressed as a pair of uint64_t, high half first, +// return that number multiplied by the given 32-bit value. If the result is +// too large to fit in a 128-bit number, divide it by 2 until it fits. +static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, + uint32_t mul) { + uint64_t bits0_31 = num.second & 0xFFFFFFFF; + uint64_t bits32_63 = num.second >> 32; + uint64_t bits64_95 = num.first & 0xFFFFFFFF; + uint64_t bits96_127 = num.first >> 32; + + // The picture so far: each of these 64-bit values has only the lower 32 bits + // filled in. + // bits96_127: [ 00000000 xxxxxxxx ] + // bits64_95: [ 00000000 xxxxxxxx ] + // bits32_63: [ 00000000 xxxxxxxx ] + // bits0_31: [ 00000000 xxxxxxxx ] + + bits0_31 *= mul; + bits32_63 *= mul; + bits64_95 *= mul; + bits96_127 *= mul; + + // Now the top halves may also have value, though all 64 of their bits will + // never be set at the same time, since they are a result of a 32x32 bit + // multiply. This makes the carry calculation slightly easier. + // bits96_127: [ mmmmmmmm | mmmmmmmm ] + // bits64_95: [ | mmmmmmmm mmmmmmmm | ] + // bits32_63: | [ mmmmmmmm | mmmmmmmm ] + // bits0_31: | [ | mmmmmmmm mmmmmmmm ] + // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ] + + uint64_t bits0_63 = bits0_31 + (bits32_63 << 32); + uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) + + (bits0_63 < bits0_31); + uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); + if (bits128_up == 0) return {bits64_127, bits0_63}; + + auto shift = static_cast<unsigned>(bit_width(bits128_up)); + uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); + uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); + return {hi, lo}; +} + +// Compute num * 5 ^ expfive, and return the first 128 bits of the result, +// where the first bit is always a one. So PowFive(1, 0) starts 0b100000, +// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc. +static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { + std::pair<uint64_t, uint64_t> result = {num, 0}; + while (expfive >= 13) { + // 5^13 is the highest power of five that will fit in a 32-bit integer. + result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5); + expfive -= 13; + } + constexpr int powers_of_five[13] = { + 1, + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; + result = Mul32(result, powers_of_five[expfive & 15]); + int shift = countl_zero(result.first); + if (shift != 0) { + result.first = (result.first << shift) + (result.second >> (64 - shift)); + result.second = (result.second << shift); + } + return result; +} + +struct ExpDigits { + int32_t exponent; + char digits[6]; +}; + +// SplitToSix converts value, a positive double-precision floating-point number, +// into a base-10 exponent and 6 ASCII digits, where the first digit is never +// zero. For example, SplitToSix(1) returns an exponent of zero and a digits +// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between +// two possible representations, e.g. value = 100000.5, then "round to even" is +// performed. +static ExpDigits SplitToSix(const double value) { + ExpDigits exp_dig; + int exp = 5; + double d = value; + // First step: calculate a close approximation of the output, where the + // value d will be between 100,000 and 999,999, representing the digits + // in the output ASCII array, and exp is the base-10 exponent. It would be + // faster to use a table here, and to look up the base-2 exponent of value, + // however value is an IEEE-754 64-bit number, so the table would have 2,000 + // entries, which is not cache-friendly. + if (d >= 999999.5) { + if (d >= 1e+261) exp += 256, d *= 1e-256; + if (d >= 1e+133) exp += 128, d *= 1e-128; + if (d >= 1e+69) exp += 64, d *= 1e-64; + if (d >= 1e+37) exp += 32, d *= 1e-32; + if (d >= 1e+21) exp += 16, d *= 1e-16; + if (d >= 1e+13) exp += 8, d *= 1e-8; + if (d >= 1e+9) exp += 4, d *= 1e-4; + if (d >= 1e+7) exp += 2, d *= 1e-2; + if (d >= 1e+6) exp += 1, d *= 1e-1; + } else { + if (d < 1e-250) exp -= 256, d *= 1e256; + if (d < 1e-122) exp -= 128, d *= 1e128; + if (d < 1e-58) exp -= 64, d *= 1e64; + if (d < 1e-26) exp -= 32, d *= 1e32; + if (d < 1e-10) exp -= 16, d *= 1e16; + if (d < 1e-2) exp -= 8, d *= 1e8; + if (d < 1e+2) exp -= 4, d *= 1e4; + if (d < 1e+4) exp -= 2, d *= 1e2; + if (d < 1e+5) exp -= 1, d *= 1e1; + } + // At this point, d is in the range [99999.5..999999.5) and exp is in the + // range [-324..308]. Since we need to round d up, we want to add a half + // and truncate. + // However, the technique above may have lost some precision, due to its + // repeated multiplication by constants that each may be off by half a bit + // of precision. This only matters if we're close to the edge though. + // Since we'd like to know if the fractional part of d is close to a half, + // we multiply it by 65536 and see if the fractional part is close to 32768. + // (The number doesn't have to be a power of two,but powers of two are faster) + uint64_t d64k = d * 65536; + int dddddd; // A 6-digit decimal integer. + if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) { + // OK, it's fairly likely that precision was lost above, which is + // not a surprise given only 52 mantissa bits are available. Therefore + // redo the calculation using 128-bit numbers. (64 bits are not enough). + + // Start out with digits rounded down; maybe add one below. + dddddd = static_cast<int>(d64k / 65536); + + // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual + // value we're representing, of course, is M.mmm... * 2^exp2. + int exp2; + double m = std::frexp(value, &exp2); + uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0); + // std::frexp returns an m value in the range [0.5, 1.0), however we + // can't multiply it by 2^64 and convert to an integer because some FPUs + // throw an exception when converting an number higher than 2^63 into an + // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter + // since m only has 52 significant bits anyway. + mantissa <<= 1; + exp2 -= 64; // not needed, but nice for debugging + + // OK, we are here to compare: + // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2 + // so we can round up dddddd if appropriate. Those values span the full + // range of 600 orders of magnitude of IEE 64-bit floating-point. + // Fortunately, we already know they are very close, so we don't need to + // track the base-2 exponent of both sides. This greatly simplifies the + // the math since the 2^exp2 calculation is unnecessary and the power-of-10 + // calculation can become a power-of-5 instead. + + std::pair<uint64_t, uint64_t> edge, val; + if (exp >= 6) { + // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa + // Since we're tossing powers of two, 2 * dddddd + 1 is the + // same as dddddd + 0.5 + edge = PowFive(2 * dddddd + 1, exp - 5); + + val.first = mantissa; + val.second = 0; + } else { + // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did + // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to + // mantissa * 5 ^ (5 - exp) + edge = PowFive(2 * dddddd + 1, 0); + + val = PowFive(mantissa, 5 - exp); + } + // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first, + // val.second, edge.first, edge.second); + if (val > edge) { + dddddd++; + } else if (val == edge) { + dddddd += (dddddd & 1); + } + } else { + // Here, we are not close to the edge. + dddddd = static_cast<int>((d64k + 32768) / 65536); + } + if (dddddd == 1000000) { + dddddd = 100000; + exp += 1; + } + exp_dig.exponent = exp; + + int two_digits = dddddd / 10000; + dddddd -= two_digits * 10000; + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]); + + two_digits = dddddd / 100; + dddddd -= two_digits * 100; + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]); + + numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]); + return exp_dig; +} + +// Helper function for fast formatting of floating-point. +// The result is the same as "%g", a.k.a. "%.6g". +size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { + static_assert(std::numeric_limits<float>::is_iec559, + "IEEE-754/IEC-559 support only"); + + char* out = buffer; // we write data to out, incrementing as we go, but + // FloatToBuffer always returns the address of the buffer + // passed in. + + if (std::isnan(d)) { + strcpy(out, "nan"); // NOLINT(runtime/printf) + return 3; + } + if (d == 0) { // +0 and -0 are handled here + if (std::signbit(d)) *out++ = '-'; + *out++ = '0'; + *out = 0; + return out - buffer; + } + if (d < 0) { + *out++ = '-'; + d = -d; + } + if (d > std::numeric_limits<double>::max()) { + strcpy(out, "inf"); // NOLINT(runtime/printf) + return out + 3 - buffer; + } + + auto exp_dig = SplitToSix(d); + int exp = exp_dig.exponent; + const char* digits = exp_dig.digits; + out[0] = '0'; + out[1] = '.'; + switch (exp) { + case 5: + memcpy(out, &digits[0], 6), out += 6; + *out = 0; + return out - buffer; + case 4: + memcpy(out, &digits[0], 5), out += 5; + if (digits[5] != '0') { + *out++ = '.'; + *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 3: + memcpy(out, &digits[0], 4), out += 4; + if ((digits[5] | digits[4]) != '0') { + *out++ = '.'; + *out++ = digits[4]; + if (digits[5] != '0') *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 2: + memcpy(out, &digits[0], 3), out += 3; + *out++ = '.'; + memcpy(out, &digits[3], 3); + out += 3; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 1: + memcpy(out, &digits[0], 2), out += 2; + *out++ = '.'; + memcpy(out, &digits[2], 4); + out += 4; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 0: + memcpy(out, &digits[0], 1), out += 1; + *out++ = '.'; + memcpy(out, &digits[1], 5); + out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case -4: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -3: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -2: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -1: + out += 2; + memcpy(out, &digits[0], 6); + out += 6; + while (out[-1] == '0') --out; + *out = 0; + return out - buffer; + } + assert(exp < -4 || exp >= 6); + out[0] = digits[0]; + assert(out[1] == '.'); + out += 2; + memcpy(out, &digits[1], 5), out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out++ = 'e'; + if (exp > 0) { + *out++ = '+'; + } else { + *out++ = '-'; + exp = -exp; + } + if (exp > 99) { + int dig1 = exp / 100; + exp -= dig1 * 100; + *out++ = '0' + dig1; + } + PutTwoDigits(exp, out); + out += 2; + *out = 0; + return out - buffer; +} + +namespace { +// Represents integer values of digits. +// Uses 36 to indicate an invalid character since we support +// bases up to 36. +static const int8_t kAsciiToInt[256] = { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; + +// Parse the sign and optional hex or oct prefix in text. +inline bool safe_parse_sign_and_base(y_absl::string_view* text /*inout*/, + int* base_ptr /*inout*/, + bool* negative_ptr /*output*/) { + if (text->data() == nullptr) { + return false; + } + + const char* start = text->data(); + const char* end = start + text->size(); + int base = *base_ptr; + + // Consume whitespace. + while (start < end && y_absl::ascii_isspace(start[0])) { + ++start; + } + while (start < end && y_absl::ascii_isspace(end[-1])) { + --end; + } + if (start >= end) { + return false; + } + + // Consume sign. + *negative_ptr = (start[0] == '-'); + if (*negative_ptr || start[0] == '+') { + ++start; + if (start >= end) { + return false; + } + } + + // Consume base-dependent prefix. + // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 + // base 16: "0x" -> base 16 + // Also validate the base. + if (base == 0) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + base = 16; + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } else if (end - start >= 1 && start[0] == '0') { + base = 8; + start += 1; + } else { + base = 10; + } + } else if (base == 16) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } + } else if (base >= 2 && base <= 36) { + // okay + } else { + return false; + } + *text = y_absl::string_view(start, end - start); + *base_ptr = base; + return true; +} + +// Consume digits. +// +// The classic loop: +// +// for each digit +// value = value * base + digit +// value *= sign +// +// The classic loop needs overflow checking. It also fails on the most +// negative integer, -2147483648 in 32-bit two's complement representation. +// +// My improved loop: +// +// if (!negative) +// for each digit +// value = value * base +// value = value + digit +// else +// for each digit +// value = value * base +// value = value - digit +// +// Overflow checking becomes simple. + +// Lookup tables per IntType: +// vmax/base and vmin/base are precomputed because division costs at least 8ns. +// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a +// struct of arrays) would probably be better in terms of d-cache for the most +// commonly used bases. +template <typename IntType> +struct LookupTables { + ABSL_CONST_INIT static const IntType kVmaxOverBase[]; + ABSL_CONST_INIT static const IntType kVminOverBase[]; +}; + +// An array initializer macro for X/base where base in [0, 36]. +// However, note that lookups for base in [0, 1] should never happen because +// base has been validated to be in [2, 36] by safe_parse_sign_and_base(). +#define X_OVER_BASE_INITIALIZER(X) \ + { \ + 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \ + X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \ + X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \ + X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \ + X / 35, X / 36, \ + } + +// This kVmaxOverBase is generated with +// for (int base = 2; base < 37; ++base) { +// y_absl::uint128 max = std::numeric_limits<y_absl::uint128>::max(); +// auto result = max / base; +// std::cout << " MakeUint128(" << y_absl::Uint128High64(result) << "u, " +// << y_absl::Uint128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/aneYsb +// +// uint128& operator/=(uint128) is not constexpr, so hardcode the resulting +// array to avoid a static initializer. +template<> +const uint128 LookupTables<uint128>::kVmaxOverBase[] = { + 0, + 0, + MakeUint128(9223372036854775807u, 18446744073709551615u), + MakeUint128(6148914691236517205u, 6148914691236517205u), + MakeUint128(4611686018427387903u, 18446744073709551615u), + MakeUint128(3689348814741910323u, 3689348814741910323u), + MakeUint128(3074457345618258602u, 12297829382473034410u), + MakeUint128(2635249153387078802u, 5270498306774157604u), + MakeUint128(2305843009213693951u, 18446744073709551615u), + MakeUint128(2049638230412172401u, 14347467612885206812u), + MakeUint128(1844674407370955161u, 11068046444225730969u), + MakeUint128(1676976733973595601u, 8384883669867978007u), + MakeUint128(1537228672809129301u, 6148914691236517205u), + MakeUint128(1418980313362273201u, 4256940940086819603u), + MakeUint128(1317624576693539401u, 2635249153387078802u), + MakeUint128(1229782938247303441u, 1229782938247303441u), + MakeUint128(1152921504606846975u, 18446744073709551615u), + MakeUint128(1085102592571150095u, 1085102592571150095u), + MakeUint128(1024819115206086200u, 16397105843297379214u), + MakeUint128(970881267037344821u, 16504981539634861972u), + MakeUint128(922337203685477580u, 14757395258967641292u), + MakeUint128(878416384462359600u, 14054662151397753612u), + MakeUint128(838488366986797800u, 13415813871788764811u), + MakeUint128(802032351030850070u, 4812194106185100421u), + MakeUint128(768614336404564650u, 12297829382473034410u), + MakeUint128(737869762948382064u, 11805916207174113034u), + MakeUint128(709490156681136600u, 11351842506898185609u), + MakeUint128(683212743470724133u, 17080318586768103348u), + MakeUint128(658812288346769700u, 10540996613548315209u), + MakeUint128(636094623231363848u, 15266270957552732371u), + MakeUint128(614891469123651720u, 9838263505978427528u), + MakeUint128(595056260442243600u, 9520900167075897608u), + MakeUint128(576460752303423487u, 18446744073709551615u), + MakeUint128(558992244657865200u, 8943875914525843207u), + MakeUint128(542551296285575047u, 9765923333140350855u), + MakeUint128(527049830677415760u, 8432797290838652167u), + MakeUint128(512409557603043100u, 8198552921648689607u), +}; + +// This kVmaxOverBase generated with +// for (int base = 2; base < 37; ++base) { +// y_absl::int128 max = std::numeric_limits<y_absl::int128>::max(); +// auto result = max / base; +// std::cout << "\tMakeInt128(" << y_absl::Int128High64(result) << ", " +// << y_absl::Int128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVmaxOverBase[] = { + 0, + 0, + MakeInt128(4611686018427387903, 18446744073709551615u), + MakeInt128(3074457345618258602, 12297829382473034410u), + MakeInt128(2305843009213693951, 18446744073709551615u), + MakeInt128(1844674407370955161, 11068046444225730969u), + MakeInt128(1537228672809129301, 6148914691236517205u), + MakeInt128(1317624576693539401, 2635249153387078802u), + MakeInt128(1152921504606846975, 18446744073709551615u), + MakeInt128(1024819115206086200, 16397105843297379214u), + MakeInt128(922337203685477580, 14757395258967641292u), + MakeInt128(838488366986797800, 13415813871788764811u), + MakeInt128(768614336404564650, 12297829382473034410u), + MakeInt128(709490156681136600, 11351842506898185609u), + MakeInt128(658812288346769700, 10540996613548315209u), + MakeInt128(614891469123651720, 9838263505978427528u), + MakeInt128(576460752303423487, 18446744073709551615u), + MakeInt128(542551296285575047, 9765923333140350855u), + MakeInt128(512409557603043100, 8198552921648689607u), + MakeInt128(485440633518672410, 17475862806672206794u), + MakeInt128(461168601842738790, 7378697629483820646u), + MakeInt128(439208192231179800, 7027331075698876806u), + MakeInt128(419244183493398900, 6707906935894382405u), + MakeInt128(401016175515425035, 2406097053092550210u), + MakeInt128(384307168202282325, 6148914691236517205u), + MakeInt128(368934881474191032, 5902958103587056517u), + MakeInt128(354745078340568300, 5675921253449092804u), + MakeInt128(341606371735362066, 17763531330238827482u), + MakeInt128(329406144173384850, 5270498306774157604u), + MakeInt128(318047311615681924, 7633135478776366185u), + MakeInt128(307445734561825860, 4919131752989213764u), + MakeInt128(297528130221121800, 4760450083537948804u), + MakeInt128(288230376151711743, 18446744073709551615u), + MakeInt128(279496122328932600, 4471937957262921603u), + MakeInt128(271275648142787523, 14106333703424951235u), + MakeInt128(263524915338707880, 4216398645419326083u), + MakeInt128(256204778801521550, 4099276460824344803u), +}; + +// This kVminOverBase generated with +// for (int base = 2; base < 37; ++base) { +// y_absl::int128 min = std::numeric_limits<y_absl::int128>::min(); +// auto result = min / base; +// std::cout << "\tMakeInt128(" << y_absl::Int128High64(result) << ", " +// << y_absl::Int128Low64(result) << "u),\n"; +// } +// +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVminOverBase[] = { + 0, + 0, + MakeInt128(-4611686018427387904, 0u), + MakeInt128(-3074457345618258603, 6148914691236517206u), + MakeInt128(-2305843009213693952, 0u), + MakeInt128(-1844674407370955162, 7378697629483820647u), + MakeInt128(-1537228672809129302, 12297829382473034411u), + MakeInt128(-1317624576693539402, 15811494920322472814u), + MakeInt128(-1152921504606846976, 0u), + MakeInt128(-1024819115206086201, 2049638230412172402u), + MakeInt128(-922337203685477581, 3689348814741910324u), + MakeInt128(-838488366986797801, 5030930201920786805u), + MakeInt128(-768614336404564651, 6148914691236517206u), + MakeInt128(-709490156681136601, 7094901566811366007u), + MakeInt128(-658812288346769701, 7905747460161236407u), + MakeInt128(-614891469123651721, 8608480567731124088u), + MakeInt128(-576460752303423488, 0u), + MakeInt128(-542551296285575048, 8680820740569200761u), + MakeInt128(-512409557603043101, 10248191152060862009u), + MakeInt128(-485440633518672411, 970881267037344822u), + MakeInt128(-461168601842738791, 11068046444225730970u), + MakeInt128(-439208192231179801, 11419412998010674810u), + MakeInt128(-419244183493398901, 11738837137815169211u), + MakeInt128(-401016175515425036, 16040647020617001406u), + MakeInt128(-384307168202282326, 12297829382473034411u), + MakeInt128(-368934881474191033, 12543785970122495099u), + MakeInt128(-354745078340568301, 12770822820260458812u), + MakeInt128(-341606371735362067, 683212743470724134u), + MakeInt128(-329406144173384851, 13176245766935394012u), + MakeInt128(-318047311615681925, 10813608594933185431u), + MakeInt128(-307445734561825861, 13527612320720337852u), + MakeInt128(-297528130221121801, 13686293990171602812u), + MakeInt128(-288230376151711744, 0u), + MakeInt128(-279496122328932601, 13974806116446630013u), + MakeInt128(-271275648142787524, 4340410370284600381u), + MakeInt128(-263524915338707881, 14230345428290225533u), + MakeInt128(-256204778801521551, 14347467612885206813u), +}; + +template <typename IntType> +const IntType LookupTables<IntType>::kVmaxOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); + +template <typename IntType> +const IntType LookupTables<IntType>::kVminOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min()); + +#undef X_OVER_BASE_INITIALIZER + +template <typename IntType> +inline bool safe_parse_positive_int(y_absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmax = std::numeric_limits<IntType>::max(); + assert(vmax > 0); + assert(base >= 0); + assert(vmax >= static_cast<IntType>(base)); + const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::max() / base == vmax_over_base); + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value > vmax_over_base) { + *value_p = vmax; + return false; + } + value *= base; + if (value > vmax - digit) { + *value_p = vmax; + return false; + } + value += digit; + } + *value_p = value; + return true; +} + +template <typename IntType> +inline bool safe_parse_negative_int(y_absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmin = std::numeric_limits<IntType>::min(); + assert(vmin < 0); + assert(vmin <= 0 - base); + IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::min() / base == vmin_over_base); + // 2003 c++ standard [expr.mul] + // "... the sign of the remainder is implementation-defined." + // Although (vmin/base)*base + vmin%base is always vmin. + // 2011 c++ standard tightens the spec but we cannot rely on it. + // TODO(junyer): Handle this in the lookup table generation. + if (vmin % base > 0) { + vmin_over_base += 1; + } + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value < vmin_over_base) { + *value_p = vmin; + return false; + } + value *= base; + if (value < vmin + digit) { + *value_p = vmin; + return false; + } + value -= digit; + } + *value_p = value; + return true; +} + +// Input format based on POSIX.1-2008 strtol +// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html +template <typename IntType> +inline bool safe_int_internal(y_absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative)) { + return false; + } + if (!negative) { + return safe_parse_positive_int(text, base, value_p); + } else { + return safe_parse_negative_int(text, base, value_p); + } +} + +template <typename IntType> +inline bool safe_uint_internal(y_absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) { + return false; + } + return safe_parse_positive_int(text, base, value_p); +} +} // anonymous namespace + +namespace numbers_internal { + +// Digit conversion. +ABSL_CONST_INIT ABSL_DLL const char kHexChar[] = + "0123456789abcdef"; + +ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = { + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + +bool safe_strto32_base(y_absl::string_view text, int32_t* value, int base) { + return safe_int_internal<int32_t>(text, value, base); +} + +bool safe_strto64_base(y_absl::string_view text, int64_t* value, int base) { + return safe_int_internal<int64_t>(text, value, base); +} + +bool safe_strto128_base(y_absl::string_view text, int128* value, int base) { + return safe_int_internal<y_absl::int128>(text, value, base); +} + +bool safe_strtou32_base(y_absl::string_view text, uint32_t* value, int base) { + return safe_uint_internal<uint32_t>(text, value, base); +} + +bool safe_strtou64_base(y_absl::string_view text, uint64_t* value, int base) { + return safe_uint_internal<uint64_t>(text, value, base); +} + +bool safe_strtou128_base(y_absl::string_view text, uint128* value, int base) { + return safe_uint_internal<y_absl::uint128>(text, value, base); +} + +} // namespace numbers_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.h new file mode 100644 index 0000000000..ce181d8eb1 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/numbers.h @@ -0,0 +1,308 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: numbers.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for converting strings to numbers. For +// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h, +// which automatically detect and convert most number values appropriately. + +#ifndef ABSL_STRINGS_NUMBERS_H_ +#define ABSL_STRINGS_NUMBERS_H_ + +#if defined(__SSE4_2__) && !defined(__CUDACC__) +#define _Y__SSE4_2__ +#endif + +#ifdef _Y__SSE4_2__ +#ifdef _MSC_VER +#include <intrin.h> +#else +#include <x86intrin.h> +#endif +#endif + +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <limits> +#include <util/generic/string.h> +#include <type_traits> + +#include "y_absl/base/config.h" +#ifdef _Y__SSE4_2__ +// TODO(jorg): Remove this when we figure out the right way +// to swap bytes on SSE 4.2 that works with the compilers +// we claim to support. Also, add tests for the compiler +// that doesn't support the Intel _bswap64 intrinsic but +// does support all the SSE 4.2 intrinsics +#include "y_absl/base/internal/endian.h" +#endif +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" +#include "y_absl/numeric/bits.h" +#include "y_absl/numeric/int128.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// SimpleAtoi() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into an integer value, returning `true` if successful. The string +// must reflect a base-10 integer whose value falls within the range of the +// integer type (optionally preceded by a `+` or `-`). If any errors are +// encountered, this function returns `false`, leaving `out` in an unspecified +// state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(y_absl::string_view str, int_type* out); + +// SimpleAtof() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into a float, which may be rounded on overflow or underflow, +// returning `true` if successful. +// See https://en.cppreference.com/w/c/string/byte/strtof for details about the +// allowed formats for `str`, except SimpleAtof() is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function +// returns `false`, leaving `out` in an unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtof(y_absl::string_view str, float* out); + +// SimpleAtod() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into a double, which may be rounded on overflow or underflow, +// returning `true` if successful. +// See https://en.cppreference.com/w/c/string/byte/strtof for details about the +// allowed formats for `str`, except SimpleAtod is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function +// returns `false`, leaving `out` in an unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtod(y_absl::string_view str, double* out); + +// SimpleAtob() +// +// Converts the given string into a boolean, returning `true` if successful. +// The following case-insensitive strings are interpreted as boolean `true`: +// "true", "t", "yes", "y", "1". The following case-insensitive strings +// are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any +// errors are encountered, this function returns `false`, leaving `out` in an +// unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtob(y_absl::string_view str, bool* out); + +// SimpleHexAtoi() +// +// Converts a hexadecimal string (optionally followed or preceded by ASCII +// whitespace) to an integer, returning `true` if successful. Only valid base-16 +// hexadecimal integers whose value falls within the range of the integer type +// (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal +// value may include both upper and lowercase character symbols, and may +// optionally include a leading "0x" (or "0X") number prefix, which is ignored +// by this function. If any errors are encountered, this function returns +// `false`, leaving `out` in an unspecified state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(y_absl::string_view str, int_type* out); + +// Overloads of SimpleHexAtoi() for 128 bit integers. +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(y_absl::string_view str, + y_absl::int128* out); +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(y_absl::string_view str, + y_absl::uint128* out); + +ABSL_NAMESPACE_END +} // namespace y_absl + +// End of public API. Implementation details follow. + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace numbers_internal { + +// Digit conversion. +ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef +ABSL_DLL extern const char + kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... +ABSL_DLL extern const char + two_ASCII_digits[100][2]; // 00, 01, 02, 03... + +// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the +// range 0 <= i < 100, and buf must have space for two characters. Example: +// char buf[2]; +// PutTwoDigits(42, buf); +// // buf[0] == '4' +// // buf[1] == '2' +inline void PutTwoDigits(size_t i, char* buf) { + assert(i < 100); + memcpy(buf, two_ASCII_digits[i], 2); +} + +// safe_strto?() functions for implementing SimpleAtoi() + +bool safe_strto32_base(y_absl::string_view text, int32_t* value, int base); +bool safe_strto64_base(y_absl::string_view text, int64_t* value, int base); +bool safe_strto128_base(y_absl::string_view text, y_absl::int128* value, + int base); +bool safe_strtou32_base(y_absl::string_view text, uint32_t* value, int base); +bool safe_strtou64_base(y_absl::string_view text, uint64_t* value, int base); +bool safe_strtou128_base(y_absl::string_view text, y_absl::uint128* value, + int base); + +static const int kFastToBufferSize = 32; +static const int kSixDigitsToBufferSize = 16; + +// Helper function for fast formatting of floating-point values. +// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six +// significant digits are returned, trailing zeros are removed, and numbers +// outside the range 0.0001-999999 are output using scientific notation +// (1.23456e+06). This routine is heavily optimized. +// Required buffer size is `kSixDigitsToBufferSize`. +size_t SixDigitsToBuffer(double d, char* buffer); + +// These functions are intended for speed. All functions take an output buffer +// as an argument and return a pointer to the last byte they wrote, which is the +// terminating '\0'. At most `kFastToBufferSize` bytes are written. +char* FastIntToBuffer(int32_t, char*); +char* FastIntToBuffer(uint32_t, char*); +char* FastIntToBuffer(int64_t, char*); +char* FastIntToBuffer(uint64_t, char*); + +// For enums and integer types that are not an exact match for the types above, +// use templates to call the appropriate one of the four overloads above. +template <typename int_type> +char* FastIntToBuffer(int_type i, char* buffer) { + static_assert(sizeof(i) <= 64 / 8, + "FastIntToBuffer works only with 64-bit-or-less integers."); + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return FastIntToBuffer(static_cast<int64_t>(i), buffer); + } else { // 32-bit or less + return FastIntToBuffer(static_cast<int32_t>(i), buffer); + } + } else { // Unsigned + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return FastIntToBuffer(static_cast<uint64_t>(i), buffer); + } else { // 32-bit or less + return FastIntToBuffer(static_cast<uint32_t>(i), buffer); + } + } +} + +// Implementation of SimpleAtoi, generalized to support arbitrary base (used +// with base different from 10 elsewhere in Abseil implementation). +template <typename int_type> +ABSL_MUST_USE_RESULT bool safe_strtoi_base(y_absl::string_view s, int_type* out, + int base) { + static_assert(sizeof(*out) == 4 || sizeof(*out) == 8, + "SimpleAtoi works only with 32-bit or 64-bit integers."); + static_assert(!std::is_floating_point<int_type>::value, + "Use SimpleAtof or SimpleAtod instead."); + bool parsed; + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(*out) == 64 / 8) { // 64-bit + int64_t val; + parsed = numbers_internal::safe_strto64_base(s, &val, base); + *out = static_cast<int_type>(val); + } else { // 32-bit + int32_t val; + parsed = numbers_internal::safe_strto32_base(s, &val, base); + *out = static_cast<int_type>(val); + } + } else { // Unsigned + if (sizeof(*out) == 64 / 8) { // 64-bit + uint64_t val; + parsed = numbers_internal::safe_strtou64_base(s, &val, base); + *out = static_cast<int_type>(val); + } else { // 32-bit + uint32_t val; + parsed = numbers_internal::safe_strtou32_base(s, &val, base); + *out = static_cast<int_type>(val); + } + } + return parsed; +} + +// FastHexToBufferZeroPad16() +// +// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but +// without the terminating null character. Thus `out` must be of length >= 16. +// Returns the number of non-pad digits of the output (it can never be zero +// since 0 has one digit). +inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { +#ifdef _Y__SSE4_2__ + uint64_t be = y_absl::big_endian::FromHost64(val); + const auto kNibbleMask = _mm_set1_epi8(0xf); + const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); + auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword + auto v4 = _mm_srli_epi64(v, 4); // shift 4 right + auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes + auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles + auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); +#else + for (int i = 0; i < 8; ++i) { + auto byte = (val >> (56 - 8 * i)) & 0xFF; + auto* hex = &y_absl::numbers_internal::kHexTable[byte * 2]; + std::memcpy(out + 2 * i, hex, 2); + } +#endif + // | 0x1 so that even 0 has 1 digit. + return 16 - countl_zero(val | 0x1) / 4; +} + +} // namespace numbers_internal + +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(y_absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(y_absl::string_view str, + y_absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(y_absl::string_view str, + y_absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 10); +} + +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(y_absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(y_absl::string_view str, + y_absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(y_absl::string_view str, + y_absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 16); +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_NUMBERS_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.cc new file mode 100644 index 0000000000..9e11702eae --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.cc @@ -0,0 +1,246 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/str_cat.h" + +#include <assert.h> + +#include <algorithm> +#include <cstdint> +#include <cstring> + +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/numbers.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +AlphaNum::AlphaNum(Hex hex) { + static_assert(numbers_internal::kFastToBufferSize >= 32, + "This function only works when output buffer >= 32 bytes long"); + char* const end = &digits_[numbers_internal::kFastToBufferSize]; + auto real_width = + y_absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); + if (real_width >= hex.width) { + piece_ = y_absl::string_view(end - real_width, real_width); + } else { + // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and + // max pad width can be up to 20. + std::memset(end - 32, hex.fill, 16); + // Patch up everything else up to the real_width. + std::memset(end - real_width - 16, hex.fill, 16); + piece_ = y_absl::string_view(end - hex.width, hex.width); + } +} + +AlphaNum::AlphaNum(Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char* const end = &digits_[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t value = dec.value; + bool neg = dec.neg; + while (value > 9) { + *--writer = '0' + (value % 10); + value /= 10; + } + *--writer = '0' + value; + if (neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + piece_ = y_absl::string_view(writer, end - writer); +} + +// ---------------------------------------------------------------------- +// StrCat() +// This merges the given strings or integers, with no delimiter. This +// is designed to be the fastest possible way to construct a string out +// of a mix of raw C strings, string_views, strings, and integer values. +// ---------------------------------------------------------------------- + +// Append is merely a version of memcpy that returns the address of the byte +// after the area just overwritten. +static char* Append(char* out, const AlphaNum& x) { + // memcpy is allowed to overwrite arbitrary memory, so doing this after the + // call would force an extra fetch of x.size(). + char* after = out + x.size(); + if (x.size() != 0) { + memcpy(out, x.data(), x.size()); + } + return after; +} + +TString StrCat(const AlphaNum& a, const AlphaNum& b) { + TString result; + y_absl::strings_internal::STLStringResizeUninitialized(&result, + a.size() + b.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + result.size()); + return result; +} + +TString StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { + TString result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + result.size()); + return result; +} + +TString StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d) { + TString result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size() + d.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + result.size()); + return result; +} + +namespace strings_internal { + +// Do not call directly - these are not part of the public API. +TString CatPieces(std::initializer_list<y_absl::string_view> pieces) { + TString result; + size_t total_size = 0; + for (const y_absl::string_view& piece : pieces) total_size += piece.size(); + strings_internal::STLStringResizeUninitialized(&result, total_size); + + char* const begin = &result[0]; + char* out = begin; + for (const y_absl::string_view& piece : pieces) { + const size_t this_size = piece.size(); + if (this_size != 0) { + memcpy(out, piece.data(), this_size); + out += this_size; + } + } + assert(out == begin + result.size()); + return result; +} + +// It's possible to call StrAppend with an y_absl::string_view that is itself a +// fragment of the string we're appending to. However the results of this are +// random. Therefore, check for this in debug mode. Use unsigned math so we +// only have to do one comparison. Note, there's an exception case: appending an +// empty string is always allowed. +#define ASSERT_NO_OVERLAP(dest, src) \ + assert(((src).size() == 0) || \ + (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size()))) + +void AppendPieces(TString* dest, + std::initializer_list<y_absl::string_view> pieces) { + size_t old_size = dest->size(); + size_t total_size = old_size; + for (const y_absl::string_view& piece : pieces) { + ASSERT_NO_OVERLAP(*dest, piece); + total_size += piece.size(); + } + strings_internal::STLStringResizeUninitializedAmortized(dest, total_size); + + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + for (const y_absl::string_view& piece : pieces) { + const size_t this_size = piece.size(); + if (this_size != 0) { + memcpy(out, piece.data(), this_size); + out += this_size; + } + } + assert(out == begin + dest->size()); +} + +} // namespace strings_internal + +void StrAppend(TString* dest, const AlphaNum& a) { + ASSERT_NO_OVERLAP(*dest, a); + dest->append(a.data(), a.size()); +} + +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + TString::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitializedAmortized( + dest, old_size + a.size() + b.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + dest->size()); +} + +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + TString::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitializedAmortized( + dest, old_size + a.size() + b.size() + c.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + dest->size()); +} + +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + ASSERT_NO_OVERLAP(*dest, d); + TString::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitializedAmortized( + dest, old_size + a.size() + b.size() + c.size() + d.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + dest->size()); +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.h new file mode 100644 index 0000000000..a77c9ae906 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_cat.h @@ -0,0 +1,411 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_cat.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently concatenating and appending +// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines +// is actually handled through use of a special AlphaNum type, which was +// designed to be used as a parameter type that efficiently manages conversion +// to strings and avoids copies in the above operations. +// +// Any routine accepting either a string or a number may accept `AlphaNum`. +// The basic idea is that by accepting a `const AlphaNum &` as an argument +// to your function, your callers will automagically convert bools, integers, +// and floating point values to strings for you. +// +// NOTE: Use of `AlphaNum` outside of the //y_absl/strings package is unsupported +// except for the specific case of function parameters of type `AlphaNum` or +// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a +// stack variable is not supported. +// +// Conversion from 8-bit values is not accepted because, if it were, then an +// attempt to pass ':' instead of ":" might result in a 58 ending up in your +// result. +// +// Bools convert to "0" or "1". Pointers to types other than `char *` are not +// valid inputs. No output is generated for null `char *` pointers. +// +// Floating point numbers are formatted with six-digit precision, which is +// the default for "std::cout <<" or printf "%g" (the same as "%.6g"). +// +// You can convert to hexadecimal output rather than decimal output using the +// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to +// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using +// a `PadSpec` enum. +// +// ----------------------------------------------------------------------------- + +#ifndef ABSL_STRINGS_STR_CAT_H_ +#define ABSL_STRINGS_STR_CAT_H_ + +#include <array> +#include <cstdint> +#include <util/generic/string.h> +#include <type_traits> +#include <vector> + +#include "y_absl/base/port.h" +#include "y_absl/strings/numbers.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { +// AlphaNumBuffer allows a way to pass a string to StrCat without having to do +// memory allocation. It is simply a pair of a fixed-size character array, and +// a size. Please don't use outside of y_absl, yet. +template <size_t max_size> +struct AlphaNumBuffer { + std::array<char, max_size> data; + size_t size; +}; + +} // namespace strings_internal + +// Enum that specifies the number of significant digits to return in a `Hex` or +// `Dec` conversion and fill character to use. A `kZeroPad2` value, for example, +// would produce hexadecimal strings such as "0a","0f" and a 'kSpacePad5' value +// would produce hexadecimal strings such as " a"," f". +enum PadSpec : uint8_t { + kNoPad = 1, + kZeroPad2, + kZeroPad3, + kZeroPad4, + kZeroPad5, + kZeroPad6, + kZeroPad7, + kZeroPad8, + kZeroPad9, + kZeroPad10, + kZeroPad11, + kZeroPad12, + kZeroPad13, + kZeroPad14, + kZeroPad15, + kZeroPad16, + kZeroPad17, + kZeroPad18, + kZeroPad19, + kZeroPad20, + + kSpacePad2 = kZeroPad2 + 64, + kSpacePad3, + kSpacePad4, + kSpacePad5, + kSpacePad6, + kSpacePad7, + kSpacePad8, + kSpacePad9, + kSpacePad10, + kSpacePad11, + kSpacePad12, + kSpacePad13, + kSpacePad14, + kSpacePad15, + kSpacePad16, + kSpacePad17, + kSpacePad18, + kSpacePad19, + kSpacePad20, +}; + +// ----------------------------------------------------------------------------- +// Hex +// ----------------------------------------------------------------------------- +// +// `Hex` stores a set of hexadecimal string conversion parameters for use +// within `AlphaNum` string conversions. +struct Hex { + uint64_t value; + uint8_t width; + char fill; + + template <typename Int> + explicit Hex( + Int v, PadSpec spec = y_absl::kNoPad, + typename std::enable_if<sizeof(Int) == 1 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint8_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = y_absl::kNoPad, + typename std::enable_if<sizeof(Int) == 2 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint16_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = y_absl::kNoPad, + typename std::enable_if<sizeof(Int) == 4 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint32_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = y_absl::kNoPad, + typename std::enable_if<sizeof(Int) == 8 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint64_t>(v)) {} + template <typename Pointee> + explicit Hex(Pointee* v, PadSpec spec = y_absl::kNoPad) + : Hex(spec, reinterpret_cast<uintptr_t>(v)) {} + + private: + Hex(PadSpec spec, uint64_t v) + : value(v), + width(spec == y_absl::kNoPad + ? 1 + : spec >= y_absl::kSpacePad2 ? spec - y_absl::kSpacePad2 + 2 + : spec - y_absl::kZeroPad2 + 2), + fill(spec >= y_absl::kSpacePad2 ? ' ' : '0') {} +}; + +// ----------------------------------------------------------------------------- +// Dec +// ----------------------------------------------------------------------------- +// +// `Dec` stores a set of decimal string conversion parameters for use +// within `AlphaNum` string conversions. Dec is slower than the default +// integer conversion, so use it only if you need padding. +struct Dec { + uint64_t value; + uint8_t width; + char fill; + bool neg; + + template <typename Int> + explicit Dec(Int v, PadSpec spec = y_absl::kNoPad, + typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr) + : value(v >= 0 ? static_cast<uint64_t>(v) + : uint64_t{0} - static_cast<uint64_t>(v)), + width(spec == y_absl::kNoPad + ? 1 + : spec >= y_absl::kSpacePad2 ? spec - y_absl::kSpacePad2 + 2 + : spec - y_absl::kZeroPad2 + 2), + fill(spec >= y_absl::kSpacePad2 ? ' ' : '0'), + neg(v < 0) {} +}; + +// ----------------------------------------------------------------------------- +// AlphaNum +// ----------------------------------------------------------------------------- +// +// The `AlphaNum` class acts as the main parameter type for `StrCat()` and +// `StrAppend()`, providing efficient conversion of numeric, boolean, and +// hexadecimal values (through the `Hex` type) into strings. + +class AlphaNum { + public: + // No bool ctor -- bools convert to an integral type. + // A bool ctor would also convert incoming pointers (bletch). + + AlphaNum(int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + + AlphaNum(float f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + AlphaNum(double f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + + AlphaNum(Hex hex); // NOLINT(runtime/explicit) + AlphaNum(Dec dec); // NOLINT(runtime/explicit) + + template <size_t size> + AlphaNum( // NOLINT(runtime/explicit) + const strings_internal::AlphaNumBuffer<size>& buf) + : piece_(&buf.data[0], buf.size) {} + + AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit) + AlphaNum(y_absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) + + template <typename Allocator> + AlphaNum( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& str) + : piece_(str) {} + + AlphaNum(const TString& str) + : piece_(str.data(), str.size()) {} + + // Use string literals ":" instead of character literals ':'. + AlphaNum(char c) = delete; // NOLINT(runtime/explicit) + + AlphaNum(const AlphaNum&) = delete; + AlphaNum& operator=(const AlphaNum&) = delete; + + y_absl::string_view::size_type size() const { return piece_.size(); } + const char* data() const { return piece_.data(); } + y_absl::string_view Piece() const { return piece_; } + + // Normal enums are already handled by the integer formatters. + // This overload matches only scoped enums. + template <typename T, + typename = typename std::enable_if< + std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + AlphaNum(T e) // NOLINT(runtime/explicit) + : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} + + // vector<bool>::reference and const_reference require special help to + // convert to `AlphaNum` because it requires two user defined conversions. + template < + typename T, + typename std::enable_if< + std::is_class<T>::value && + (std::is_same<T, std::vector<bool>::reference>::value || + std::is_same<T, std::vector<bool>::const_reference>::value)>::type* = + nullptr> + AlphaNum(T e) : AlphaNum(static_cast<bool>(e)) {} // NOLINT(runtime/explicit) + + private: + y_absl::string_view piece_; + char digits_[numbers_internal::kFastToBufferSize]; +}; + +// ----------------------------------------------------------------------------- +// StrCat() +// ----------------------------------------------------------------------------- +// +// Merges given strings or numbers, using no delimiter(s), returning the merged +// result as a string. +// +// `StrCat()` is designed to be the fastest possible way to construct a string +// out of a mix of raw C strings, string_views, strings, bool values, +// and numeric values. +// +// Don't use `StrCat()` for user-visible strings. The localization process +// works poorly on strings built up out of fragments. +// +// For clarity and performance, don't use `StrCat()` when appending to a +// string. Use `StrAppend()` instead. In particular, avoid using any of these +// (anti-)patterns: +// +// str.append(StrCat(...)) +// str += StrCat(...) +// str = StrCat(str, ...) +// +// The last case is the worst, with a potential to change a loop +// from a linear time operation with O(1) dynamic allocations into a +// quadratic time operation with O(n) dynamic allocations. +// +// See `StrAppend()` below for more information. + +namespace strings_internal { + +// Do not call directly - this is not part of the public API. +TString CatPieces(std::initializer_list<y_absl::string_view> pieces); +void AppendPieces(TString* dest, + std::initializer_list<y_absl::string_view> pieces); + +} // namespace strings_internal + +ABSL_MUST_USE_RESULT inline TString StrCat() { return TString(); } + +ABSL_MUST_USE_RESULT inline TString StrCat(const AlphaNum& a) { + return TString(a.data(), a.size()); +} + +ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b); +ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +ABSL_MUST_USE_RESULT TString StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +ABSL_MUST_USE_RESULT inline TString StrCat( + const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AV&... args) { + return strings_internal::CatPieces( + {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// ----------------------------------------------------------------------------- +// StrAppend() +// ----------------------------------------------------------------------------- +// +// Appends a string or set of strings to an existing string, in a similar +// fashion to `StrCat()`. +// +// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the +// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does +// not try to check each of its input arguments to be sure that they are not +// a subset of the string being appended to. That is, while this will work: +// +// TString s = "foo"; +// s += s; +// +// This output is undefined: +// +// TString s = "foo"; +// StrAppend(&s, s); +// +// This output is undefined as well, since `y_absl::string_view` does not own its +// data: +// +// TString s = "foobar"; +// y_absl::string_view p = s; +// StrAppend(&s, p); + +inline void StrAppend(TString*) {} +void StrAppend(TString* dest, const AlphaNum& a); +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b); +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +inline void StrAppend(TString* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, + const AV&... args) { + strings_internal::AppendPieces( + dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// Helper function for the future StrCat default floating-point format, %.6g +// This is fast. +inline strings_internal::AlphaNumBuffer< + numbers_internal::kSixDigitsToBufferSize> +SixDigits(double d) { + strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize> + result; + result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]); + return result; +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STR_CAT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_format.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_format.h new file mode 100644 index 0000000000..4079f38fb4 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_format.h @@ -0,0 +1,812 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_format.h +// ----------------------------------------------------------------------------- +// +// The `str_format` library is a typesafe replacement for the family of +// `printf()` string formatting routines within the `<cstdio>` standard library +// header. Like the `printf` family, `str_format` uses a "format string" to +// perform argument substitutions based on types. See the `FormatSpec` section +// below for format string documentation. +// +// Example: +// +// TString s = y_absl::StrFormat( +// "%s %s You have $%d!", "Hello", name, dollars); +// +// The library consists of the following basic utilities: +// +// * `y_absl::StrFormat()`, a type-safe replacement for `std::sprintf()`, to +// write a format string to a `string` value. +// * `y_absl::StrAppendFormat()` to append a format string to a `string` +// * `y_absl::StreamFormat()` to more efficiently write a format string to a +// stream, such as`std::cout`. +// * `y_absl::PrintF()`, `y_absl::FPrintF()` and `y_absl::SNPrintF()` as +// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`. +// +// Note: a version of `std::sprintf()` is not supported as it is +// generally unsafe due to buffer overflows. +// +// Additionally, you can provide a format string (and its associated arguments) +// using one of the following abstractions: +// +// * A `FormatSpec` class template fully encapsulates a format string and its +// type arguments and is usually provided to `str_format` functions as a +// variadic argument of type `FormatSpec<Arg...>`. The `FormatSpec<Args...>` +// template is evaluated at compile-time, providing type safety. +// * A `ParsedFormat` instance, which encapsulates a specific, pre-compiled +// format string for a specific set of type(s), and which can be passed +// between API boundaries. (The `FormatSpec` type should not be used +// directly except as an argument type for wrapper functions.) +// +// The `str_format` library provides the ability to output its format strings to +// arbitrary sink types: +// +// * A generic `Format()` function to write outputs to arbitrary sink types, +// which must implement a `FormatRawSink` interface. +// +// * A `FormatUntyped()` function that is similar to `Format()` except it is +// loosely typed. `FormatUntyped()` is not a template and does not perform +// any compile-time checking of the format string; instead, it returns a +// boolean from a runtime check. +// +// In addition, the `str_format` library provides extension points for +// augmenting formatting to new types. See "StrFormat Extensions" below. + +#ifndef ABSL_STRINGS_STR_FORMAT_H_ +#define ABSL_STRINGS_STR_FORMAT_H_ + +#include <cstdio> +#include <util/generic/string.h> + +#include "y_absl/strings/internal/str_format/arg.h" // IWYU pragma: export +#include "y_absl/strings/internal/str_format/bind.h" // IWYU pragma: export +#include "y_absl/strings/internal/str_format/checker.h" // IWYU pragma: export +#include "y_absl/strings/internal/str_format/extension.h" // IWYU pragma: export +#include "y_absl/strings/internal/str_format/parser.h" // IWYU pragma: export + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// UntypedFormatSpec +// +// A type-erased class that can be used directly within untyped API entry +// points. An `UntypedFormatSpec` is specifically used as an argument to +// `FormatUntyped()`. +// +// Example: +// +// y_absl::UntypedFormatSpec format("%d"); +// TString out; +// CHECK(y_absl::FormatUntyped(&out, format, {y_absl::FormatArg(1)})); +class UntypedFormatSpec { + public: + UntypedFormatSpec() = delete; + UntypedFormatSpec(const UntypedFormatSpec&) = delete; + UntypedFormatSpec& operator=(const UntypedFormatSpec&) = delete; + + explicit UntypedFormatSpec(string_view s) : spec_(s) {} + + protected: + explicit UntypedFormatSpec(const str_format_internal::ParsedFormatBase* pc) + : spec_(pc) {} + + private: + friend str_format_internal::UntypedFormatSpecImpl; + str_format_internal::UntypedFormatSpecImpl spec_; +}; + +// FormatStreamed() +// +// Takes a streamable argument and returns an object that can print it +// with '%s'. Allows printing of types that have an `operator<<` but no +// intrinsic type support within `StrFormat()` itself. +// +// Example: +// +// y_absl::StrFormat("%s", y_absl::FormatStreamed(obj)); +template <typename T> +str_format_internal::StreamedWrapper<T> FormatStreamed(const T& v) { + return str_format_internal::StreamedWrapper<T>(v); +} + +// FormatCountCapture +// +// This class provides a way to safely wrap `StrFormat()` captures of `%n` +// conversions, which denote the number of characters written by a formatting +// operation to this point, into an integer value. +// +// This wrapper is designed to allow safe usage of `%n` within `StrFormat(); in +// the `printf()` family of functions, `%n` is not safe to use, as the `int *` +// buffer can be used to capture arbitrary data. +// +// Example: +// +// int n = 0; +// TString s = y_absl::StrFormat("%s%d%n", "hello", 123, +// y_absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +class FormatCountCapture { + public: + explicit FormatCountCapture(int* p) : p_(p) {} + + private: + // FormatCountCaptureHelper is used to define FormatConvertImpl() for this + // class. + friend struct str_format_internal::FormatCountCaptureHelper; + // Unused() is here because of the false positive from -Wunused-private-field + // p_ is used in the templated function of the friend FormatCountCaptureHelper + // class. + int* Unused() { return p_; } + int* p_; +}; + +// FormatSpec +// +// The `FormatSpec` type defines the makeup of a format string within the +// `str_format` library. It is a variadic class template that is evaluated at +// compile-time, according to the format string and arguments that are passed to +// it. +// +// You should not need to manipulate this type directly. You should only name it +// if you are writing wrapper functions which accept format arguments that will +// be provided unmodified to functions in this library. Such a wrapper function +// might be a class method that provides format arguments and/or internally uses +// the result of formatting. +// +// For a `FormatSpec` to be valid at compile-time, it must be provided as +// either: +// +// * A `constexpr` literal or `y_absl::string_view`, which is how it most often +// used. +// * A `ParsedFormat` instantiation, which ensures the format string is +// valid before use. (See below.) +// +// Example: +// +// // Provided as a string literal. +// y_absl::StrFormat("Welcome to %s, Number %d!", "The Village", 6); +// +// // Provided as a constexpr y_absl::string_view. +// constexpr y_absl::string_view formatString = "Welcome to %s, Number %d!"; +// y_absl::StrFormat(formatString, "The Village", 6); +// +// // Provided as a pre-compiled ParsedFormat object. +// // Note that this example is useful only for illustration purposes. +// y_absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// y_absl::StrFormat(formatString, "TheVillage", 6); +// +// A format string generally follows the POSIX syntax as used within the POSIX +// `printf` specification. +// +// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.) +// +// In specific, the `FormatSpec` supports the following type specifiers: +// * `c` for characters +// * `s` for strings +// * `d` or `i` for integers +// * `o` for unsigned integer conversions into octal +// * `x` or `X` for unsigned integer conversions into hex +// * `u` for unsigned integers +// * `f` or `F` for floating point values into decimal notation +// * `e` or `E` for floating point values into exponential notation +// * `a` or `A` for floating point values into hex exponential notation +// * `g` or `G` for floating point values into decimal or exponential +// notation based on their precision +// * `p` for pointer address values +// * `n` for the special case of writing out the number of characters +// written to this point. The resulting value must be captured within an +// `y_absl::FormatCountCapture` type. +// +// Implementation-defined behavior: +// * A null pointer provided to "%s" or "%p" is output as "(nil)". +// * A non-null pointer provided to "%p" is output in hex as if by %#x or +// %#lx. +// +// NOTE: `o`, `x\X` and `u` will convert signed values to their unsigned +// counterpart before formatting. +// +// Examples: +// "%c", 'a' -> "a" +// "%c", 32 -> " " +// "%s", "C" -> "C" +// "%s", TString("C++") -> "C++" +// "%d", -10 -> "-10" +// "%o", 10 -> "12" +// "%x", 16 -> "10" +// "%f", 123456789 -> "123456789.000000" +// "%e", .01 -> "1.00000e-2" +// "%a", -3.0 -> "-0x1.8p+1" +// "%g", .01 -> "1e-2" +// "%p", (void*)&value -> "0x7ffdeb6ad2a4" +// +// int n = 0; +// TString s = y_absl::StrFormat( +// "%s%d%n", "hello", 123, y_absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +// +// The `FormatSpec` intrinsically supports all of these fundamental C++ types: +// +// * Characters: `char`, `signed char`, `unsigned char` +// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`, +// `unsigned long`, `long long`, `unsigned long long` +// * Floating-point: `float`, `double`, `long double` +// +// However, in the `str_format` library, a format conversion specifies a broader +// C++ conceptual category instead of an exact type. For example, `%s` binds to +// any string-like argument, so `TString`, `y_absl::string_view`, and +// `const char*` are all accepted. Likewise, `%d` accepts any integer-like +// argument, etc. + +template <typename... Args> +using FormatSpec = str_format_internal::FormatSpecTemplate< + str_format_internal::ArgumentToConv<Args>()...>; + +// ParsedFormat +// +// A `ParsedFormat` is a class template representing a preparsed `FormatSpec`, +// with template arguments specifying the conversion characters used within the +// format string. Such characters must be valid format type specifiers, and +// these type specifiers are checked at compile-time. +// +// Instances of `ParsedFormat` can be created, copied, and reused to speed up +// formatting loops. A `ParsedFormat` may either be constructed statically, or +// dynamically through its `New()` factory function, which only constructs a +// runtime object if the format is valid at that time. +// +// Example: +// +// // Verified at compile time. +// y_absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// y_absl::StrFormat(formatString, "TheVillage", 6); +// +// // Verified at runtime. +// auto format_runtime = y_absl::ParsedFormat<'d'>::New(format_string); +// if (format_runtime) { +// value = y_absl::StrFormat(*format_runtime, i); +// } else { +// ... error case ... +// } + +#if defined(__cpp_nontype_template_parameter_auto) +// If C++17 is available, an 'extended' format is also allowed that can specify +// multiple conversion characters per format argument, using a combination of +// `y_absl::FormatConversionCharSet` enum values (logically a set union) +// via the `|` operator. (Single character-based arguments are still accepted, +// but cannot be combined). Some common conversions also have predefined enum +// values, such as `y_absl::FormatConversionCharSet::kIntegral`. +// +// Example: +// // Extended format supports multiple conversion characters per argument, +// // specified via a combination of `FormatConversionCharSet` enums. +// using MyFormat = y_absl::ParsedFormat<y_absl::FormatConversionCharSet::d | +// y_absl::FormatConversionCharSet::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // `format` can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +template <auto... Conv> +using ParsedFormat = y_absl::str_format_internal::ExtendedParsedFormat< + y_absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#else +template <char... Conv> +using ParsedFormat = str_format_internal::ExtendedParsedFormat< + y_absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#endif // defined(__cpp_nontype_template_parameter_auto) + +// StrFormat() +// +// Returns a `string` given a `printf()`-style format string and zero or more +// additional arguments. Use it as you would `sprintf()`. `StrFormat()` is the +// primary formatting function within the `str_format` library, and should be +// used in most cases where you need type-safe conversion of types into +// formatted strings. +// +// The format string generally consists of ordinary character data along with +// one or more format conversion specifiers (denoted by the `%` character). +// Ordinary character data is returned unchanged into the result string, while +// each conversion specification performs a type substitution from +// `StrFormat()`'s other arguments. See the comments for `FormatSpec` for full +// information on the makeup of this format string. +// +// Example: +// +// TString s = y_absl::StrFormat( +// "Welcome to %s, Number %d!", "The Village", 6); +// EXPECT_EQ("Welcome to The Village, Number 6!", s); +// +// Returns an empty string in case of error. +template <typename... Args> +ABSL_MUST_USE_RESULT TString StrFormat(const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StrAppendFormat() +// +// Appends to a `dst` string given a format string, and zero or more additional +// arguments, returning `*dst` as a convenience for chaining purposes. Appends +// nothing in case of error (but possibly alters its capacity). +// +// Example: +// +// TString orig("For example PI is approximately "); +// std::cout << StrAppendFormat(&orig, "%12.6f", 3.14); +template <typename... Args> +TString& StrAppendFormat(TString* dst, + const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::AppendPack( + dst, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StreamFormat() +// +// Writes to an output stream given a format string and zero or more arguments, +// generally in a manner that is more efficient than streaming the result of +// `y_absl:: StrFormat()`. The returned object must be streamed before the full +// expression ends. +// +// Example: +// +// std::cout << StreamFormat("%12.6f", 3.14); +template <typename... Args> +ABSL_MUST_USE_RESULT str_format_internal::Streamable StreamFormat( + const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::Streamable( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// PrintF() +// +// Writes to stdout given a format string and zero or more arguments. This +// function is functionally equivalent to `std::printf()` (and type-safe); +// prefer `y_absl::PrintF()` over `std::printf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// y_absl::PrintF("The capital of Mongolia is %s", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int PrintF(const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::FprintF( + stdout, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FPrintF() +// +// Writes to a file given a format string and zero or more arguments. This +// function is functionally equivalent to `std::fprintf()` (and type-safe); +// prefer `y_absl::FPrintF()` over `std::fprintf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// y_absl::FPrintF(stdout, "The capital of Mongolia is %s", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int FPrintF(std::FILE* output, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FprintF( + output, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// SNPrintF() +// +// Writes to a sized buffer given a format string and zero or more arguments. +// This function is functionally equivalent to `std::snprintf()` (and +// type-safe); prefer `y_absl::SNPrintF()` over `std::snprintf()`. +// +// In particular, a successful call to `y_absl::SNPrintF()` writes at most `size` +// bytes of the formatted output to `output`, including a NUL-terminator, and +// returns the number of bytes that would have been written if truncation did +// not occur. In the event of an error, a negative value is returned and `errno` +// is set. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// char output[128]; +// y_absl::SNPrintF(output, sizeof(output), +// "The capital of Mongolia is %s", s); +// +// Post-condition: output == "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::SnprintF( + output, size, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// ----------------------------------------------------------------------------- +// Custom Output Formatting Functions +// ----------------------------------------------------------------------------- + +// FormatRawSink +// +// FormatRawSink is a type erased wrapper around arbitrary sink objects +// specifically used as an argument to `Format()`. +// +// All the object has to do define an overload of `AbslFormatFlush()` for the +// sink, usually by adding a ADL-based free function in the same namespace as +// the sink: +// +// void AbslFormatFlush(MySink* dest, y_absl::string_view part); +// +// where `dest` is the pointer passed to `y_absl::Format()`. The function should +// append `part` to `dest`. +// +// FormatRawSink does not own the passed sink object. The passed object must +// outlive the FormatRawSink. +class FormatRawSink { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, + typename = typename std::enable_if<std::is_constructible< + str_format_internal::FormatRawSinkImpl, T*>::value>::type> + FormatRawSink(T* raw) // NOLINT + : sink_(raw) {} + + private: + friend str_format_internal::FormatRawSinkImpl; + str_format_internal::FormatRawSinkImpl sink_; +}; + +// Format() +// +// Writes a formatted string to an arbitrary sink object (implementing the +// `y_absl::FormatRawSink` interface), using a format string and zero or more +// additional arguments. +// +// By default, `TString`, `std::ostream`, and `y_absl::Cord` are supported as +// destination objects. If a `TString` is used the formatted string is +// appended to it. +// +// `y_absl::Format()` is a generic version of `y_absl::StrAppendFormat()`, for +// custom sinks. The format string, like format strings for `StrFormat()`, is +// checked at compile-time. +// +// On failure, this function returns `false` and the state of the sink is +// unspecified. +template <typename... Args> +bool Format(FormatRawSink raw_sink, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FormatArg +// +// A type-erased handle to a format argument specifically used as an argument to +// `FormatUntyped()`. You may construct `FormatArg` by passing +// reference-to-const of any printable type. `FormatArg` is both copyable and +// assignable. The source data must outlive the `FormatArg` instance. See +// example below. +// +using FormatArg = str_format_internal::FormatArgImpl; + +// FormatUntyped() +// +// Writes a formatted string to an arbitrary sink object (implementing the +// `y_absl::FormatRawSink` interface), using an `UntypedFormatSpec` and zero or +// more additional arguments. +// +// This function acts as the most generic formatting function in the +// `str_format` library. The caller provides a raw sink, an unchecked format +// string, and (usually) a runtime specified list of arguments; no compile-time +// checking of formatting is performed within this function. As a result, a +// caller should check the return value to verify that no error occurred. +// On failure, this function returns `false` and the state of the sink is +// unspecified. +// +// The arguments are provided in an `y_absl::Span<const y_absl::FormatArg>`. +// Each `y_absl::FormatArg` object binds to a single argument and keeps a +// reference to it. The values used to create the `FormatArg` objects must +// outlive this function call. +// +// Example: +// +// std::optional<TString> FormatDynamic( +// const TString& in_format, +// const vector<TString>& in_args) { +// TString out; +// std::vector<y_absl::FormatArg> args; +// for (const auto& v : in_args) { +// // It is important that 'v' is a reference to the objects in in_args. +// // The values we pass to FormatArg must outlive the call to +// // FormatUntyped. +// args.emplace_back(v); +// } +// y_absl::UntypedFormatSpec format(in_format); +// if (!y_absl::FormatUntyped(&out, format, args)) { +// return std::nullopt; +// } +// return std::move(out); +// } +// +ABSL_MUST_USE_RESULT inline bool FormatUntyped( + FormatRawSink raw_sink, const UntypedFormatSpec& format, + y_absl::Span<const FormatArg> args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), args); +} + +//------------------------------------------------------------------------------ +// StrFormat Extensions +//------------------------------------------------------------------------------ +// +// AbslFormatConvert() +// +// The StrFormat library provides a customization API for formatting +// user-defined types using y_absl::StrFormat(). The API relies on detecting an +// overload in the user-defined type's namespace of a free (non-member) +// `AbslFormatConvert()` function, usually as a friend definition with the +// following signature: +// +// y_absl::FormatConvertResult<...> AbslFormatConvert( +// const X& value, +// const y_absl::FormatConversionSpec& spec, +// y_absl::FormatSink *sink); +// +// An `AbslFormatConvert()` overload for a type should only be declared in the +// same file and namespace as said type. +// +// The abstractions within this definition include: +// +// * An `y_absl::FormatConversionSpec` to specify the fields to pull from a +// user-defined type's format string +// * An `y_absl::FormatSink` to hold the converted string data during the +// conversion process. +// * An `y_absl::FormatConvertResult` to hold the status of the returned +// formatting operation +// +// The return type encodes all the conversion characters that your +// AbslFormatConvert() routine accepts. The return value should be {true}. +// A return value of {false} will result in `StrFormat()` returning +// an empty string. This result will be propagated to the result of +// `FormatUntyped`. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslFormatConvert()`. This method interprets +// // `spec` to print in the request format. The allowed conversion characters +// // can be restricted via the type of the result, in this example +// // string and integral formatting are allowed (but not, for instance +// // floating point characters like "%f"). You can add such a free function +// // using a friend declaration within the body of the class: +// friend y_absl::FormatConvertResult<y_absl::FormatConversionCharSet::kString | +// y_absl::FormatConversionCharSet::kIntegral> +// AbslFormatConvert(const Point& p, const y_absl::FormatConversionSpec& spec, +// y_absl::FormatSink* s) { +// if (spec.conversion_char() == y_absl::FormatConversionChar::s) { +// s->Append(y_absl::StrCat("x=", p.x, " y=", p.y)); +// } else { +// s->Append(y_absl::StrCat(p.x, ",", p.y)); +// } +// return {true}; +// } +// +// int x; +// int y; +// }; + +// clang-format off + +// FormatConversionChar +// +// Specifies the formatting character provided in the format string +// passed to `StrFormat()`. +enum class FormatConversionChar : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p // misc +}; +// clang-format on + +// FormatConversionSpec +// +// Specifies modifications to the conversion of the format string, through use +// of one or more format flags in the source format string. +class FormatConversionSpec { + public: + // FormatConversionSpec::is_basic() + // + // Indicates that width and precision are not specified, and no additional + // flags are set for this conversion character in the format string. + bool is_basic() const { return impl_.is_basic(); } + + // FormatConversionSpec::has_left_flag() + // + // Indicates whether the result should be left justified for this conversion + // character in the format string. This flag is set through use of a '-' + // character in the format string. E.g. "%-s" + bool has_left_flag() const { return impl_.has_left_flag(); } + + // FormatConversionSpec::has_show_pos_flag() + // + // Indicates whether a sign column is prepended to the result for this + // conversion character in the format string, even if the result is positive. + // This flag is set through use of a '+' character in the format string. + // E.g. "%+d" + bool has_show_pos_flag() const { return impl_.has_show_pos_flag(); } + + // FormatConversionSpec::has_sign_col_flag() + // + // Indicates whether a mandatory sign column is added to the result for this + // conversion character. This flag is set through use of a space character + // (' ') in the format string. E.g. "% i" + bool has_sign_col_flag() const { return impl_.has_sign_col_flag(); } + + // FormatConversionSpec::has_alt_flag() + // + // Indicates whether an "alternate" format is applied to the result for this + // conversion character. Alternative forms depend on the type of conversion + // character, and unallowed alternatives are undefined. This flag is set + // through use of a '#' character in the format string. E.g. "%#h" + bool has_alt_flag() const { return impl_.has_alt_flag(); } + + // FormatConversionSpec::has_zero_flag() + // + // Indicates whether zeroes should be prepended to the result for this + // conversion character instead of spaces. This flag is set through use of the + // '0' character in the format string. E.g. "%0f" + bool has_zero_flag() const { return impl_.has_zero_flag(); } + + // FormatConversionSpec::conversion_char() + // + // Returns the underlying conversion character. + FormatConversionChar conversion_char() const { + return impl_.conversion_char(); + } + + // FormatConversionSpec::width() + // + // Returns the specified width (indicated through use of a non-zero integer + // value or '*' character) of the conversion character. If width is + // unspecified, it returns a negative value. + int width() const { return impl_.width(); } + + // FormatConversionSpec::precision() + // + // Returns the specified precision (through use of the '.' character followed + // by a non-zero integer value or '*' character) of the conversion character. + // If precision is unspecified, it returns a negative value. + int precision() const { return impl_.precision(); } + + private: + explicit FormatConversionSpec( + str_format_internal::FormatConversionSpecImpl impl) + : impl_(impl) {} + + friend str_format_internal::FormatConversionSpecImpl; + + y_absl::str_format_internal::FormatConversionSpecImpl impl_; +}; + +// Type safe OR operator for FormatConversionCharSet to allow accepting multiple +// conversion chars in custom format converters. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return static_cast<FormatConversionCharSet>(static_cast<uint64_t>(a) | + static_cast<uint64_t>(b)); +} + +// FormatConversionCharSet +// +// Specifies the _accepted_ conversion types as a template parameter to +// FormatConvertResult for custom implementations of `AbslFormatConvert`. +// Note the helper predefined alias definitions (kIntegral, etc.) below. +enum class FormatConversionCharSet : uint64_t { + // text + c = str_format_internal::FormatConversionCharToConvInt('c'), + s = str_format_internal::FormatConversionCharToConvInt('s'), + // integer + d = str_format_internal::FormatConversionCharToConvInt('d'), + i = str_format_internal::FormatConversionCharToConvInt('i'), + o = str_format_internal::FormatConversionCharToConvInt('o'), + u = str_format_internal::FormatConversionCharToConvInt('u'), + x = str_format_internal::FormatConversionCharToConvInt('x'), + X = str_format_internal::FormatConversionCharToConvInt('X'), + // Float + f = str_format_internal::FormatConversionCharToConvInt('f'), + F = str_format_internal::FormatConversionCharToConvInt('F'), + e = str_format_internal::FormatConversionCharToConvInt('e'), + E = str_format_internal::FormatConversionCharToConvInt('E'), + g = str_format_internal::FormatConversionCharToConvInt('g'), + G = str_format_internal::FormatConversionCharToConvInt('G'), + a = str_format_internal::FormatConversionCharToConvInt('a'), + A = str_format_internal::FormatConversionCharToConvInt('A'), + // misc + n = str_format_internal::FormatConversionCharToConvInt('n'), + p = str_format_internal::FormatConversionCharToConvInt('p'), + + // Used for width/precision '*' specification. + kStar = static_cast<uint64_t>( + y_absl::str_format_internal::FormatConversionCharSetInternal::kStar), + // Some predefined values: + kIntegral = d | i | u | o | x | X, + kFloating = a | e | f | g | A | E | F | G, + kNumeric = kIntegral | kFloating, + kString = s, + kPointer = p, +}; + +// FormatSink +// +// An abstraction to which conversions write their string data. +// +class FormatSink { + public: + // Appends `count` copies of `ch`. + void Append(size_t count, char ch) { sink_->Append(count, ch); } + + void Append(string_view v) { sink_->Append(v); } + + // Appends the first `precision` bytes of `v`. If this is less than + // `width`, spaces will be appended first (if `left` is false), or + // after (if `left` is true) to ensure the total amount appended is + // at least `width`. + bool PutPaddedString(string_view v, int width, int precision, bool left) { + return sink_->PutPaddedString(v, width, precision, left); + } + + private: + friend str_format_internal::FormatSinkImpl; + explicit FormatSink(str_format_internal::FormatSinkImpl* s) : sink_(s) {} + str_format_internal::FormatSinkImpl* sink_; +}; + +// FormatConvertResult +// +// Indicates whether a call to AbslFormatConvert() was successful. +// This return type informs the StrFormat extension framework (through +// ADL but using the return type) of what conversion characters are supported. +// It is strongly discouraged to return {false}, as this will result in an +// empty string in StrFormat. +template <FormatConversionCharSet C> +struct FormatConvertResult { + bool value; +}; + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STR_FORMAT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_join.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_join.h new file mode 100644 index 0000000000..46a0323c6e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_join.h @@ -0,0 +1,293 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_join.h +// ----------------------------------------------------------------------------- +// +// This header file contains functions for joining a range of elements and +// returning the result as a TString. StrJoin operations are specified by +// passing a range, a separator string to use between the elements joined, and +// an optional Formatter responsible for converting each argument in the range +// to a string. If omitted, a default `AlphaNumFormatter()` is called on the +// elements to be joined, using the same formatting that `y_absl::StrCat()` uses. +// This package defines a number of default formatters, and you can define your +// own implementations. +// +// Ranges are specified by passing a container with `std::begin()` and +// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a +// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous +// objects. The separator string is specified as an `y_absl::string_view`. +// +// Because the default formatter uses the `y_absl::AlphaNum` class, +// `y_absl::StrJoin()`, like `y_absl::StrCat()`, will work out-of-the-box on +// collections of strings, ints, floats, doubles, etc. +// +// Example: +// +// std::vector<TString> v = {"foo", "bar", "baz"}; +// TString s = y_absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// See comments on the `y_absl::StrJoin()` function for more examples. + +#ifndef ABSL_STRINGS_STR_JOIN_H_ +#define ABSL_STRINGS_STR_JOIN_H_ + +#include <cstdio> +#include <cstring> +#include <initializer_list> +#include <iterator> +#include <util/generic/string.h> +#include <tuple> +#include <type_traits> +#include <utility> + +#include "y_absl/base/macros.h" +#include "y_absl/strings/internal/str_join_internal.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// ----------------------------------------------------------------------------- +// Concept: Formatter +// ----------------------------------------------------------------------------- +// +// A Formatter is a function object that is responsible for formatting its +// argument as a string and appending it to a given output TString. +// Formatters may be implemented as function objects, lambdas, or normal +// functions. You may provide your own Formatter to enable `y_absl::StrJoin()` to +// work with arbitrary types. +// +// The following is an example of a custom Formatter that simply uses +// `std::to_string()` to format an integer as a TString. +// +// struct MyFormatter { +// void operator()(TString* out, int i) const { +// out->append(std::to_string(i)); +// } +// }; +// +// You would use the above formatter by passing an instance of it as the final +// argument to `y_absl::StrJoin()`: +// +// std::vector<int> v = {1, 2, 3, 4}; +// TString s = y_absl::StrJoin(v, "-", MyFormatter()); +// EXPECT_EQ("1-2-3-4", s); +// +// The following standard formatters are provided within this file: +// +// - `AlphaNumFormatter()` (the default) +// - `StreamFormatter()` +// - `PairFormatter()` +// - `DereferenceFormatter()` + +// AlphaNumFormatter() +// +// Default formatter used if none is specified. Uses `y_absl::AlphaNum` to convert +// numeric arguments to strings. +inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() { + return strings_internal::AlphaNumFormatterImpl(); +} + +// StreamFormatter() +// +// Formats its argument using the << operator. +inline strings_internal::StreamFormatterImpl StreamFormatter() { + return strings_internal::StreamFormatterImpl(); +} + +// Function Template: PairFormatter(Formatter, y_absl::string_view, Formatter) +// +// Formats a `std::pair` by putting a given separator between the pair's +// `.first` and `.second` members. This formatter allows you to specify +// custom Formatters for both the first and second member of each pair. +template <typename FirstFormatter, typename SecondFormatter> +inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter> +PairFormatter(FirstFormatter f1, y_absl::string_view sep, SecondFormatter f2) { + return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>( + std::move(f1), sep, std::move(f2)); +} + +// Function overload of PairFormatter() for using a default +// `AlphaNumFormatter()` for each Formatter in the pair. +inline strings_internal::PairFormatterImpl< + strings_internal::AlphaNumFormatterImpl, + strings_internal::AlphaNumFormatterImpl> +PairFormatter(y_absl::string_view sep) { + return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter()); +} + +// Function Template: DereferenceFormatter(Formatter) +// +// Formats its argument by dereferencing it and then applying the given +// formatter. This formatter is useful for formatting a container of +// pointer-to-T. This pattern often shows up when joining repeated fields in +// protocol buffers. +template <typename Formatter> +strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter( + Formatter&& f) { + return strings_internal::DereferenceFormatterImpl<Formatter>( + std::forward<Formatter>(f)); +} + +// Function overload of `DereferenceFormatter()` for using a default +// `AlphaNumFormatter()`. +inline strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl> +DereferenceFormatter() { + return strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter()); +} + +// ----------------------------------------------------------------------------- +// StrJoin() +// ----------------------------------------------------------------------------- +// +// Joins a range of elements and returns the result as a TString. +// `y_absl::StrJoin()` takes a range, a separator string to use between the +// elements joined, and an optional Formatter responsible for converting each +// argument in the range to a string. +// +// If omitted, the default `AlphaNumFormatter()` is called on the elements to be +// joined. +// +// Example 1: +// // Joins a collection of strings. This pattern also works with a collection +// // of `y_absl::string_view` or even `const char*`. +// std::vector<TString> v = {"foo", "bar", "baz"}; +// TString s = y_absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 2: +// // Joins the values in the given `std::initializer_list<>` specified using +// // brace initialization. This pattern also works with an initializer_list +// // of ints or `y_absl::string_view` -- any `AlphaNum`-compatible type. +// TString s = y_absl::StrJoin({"foo", "bar", "baz"}, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 3: +// // Joins a collection of ints. This pattern also works with floats, +// // doubles, int64s -- any `StrCat()`-compatible type. +// std::vector<int> v = {1, 2, 3, -4}; +// TString s = y_absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3--4", s); +// +// Example 4: +// // Joins a collection of pointer-to-int. By default, pointers are +// // dereferenced and the pointee is formatted using the default format for +// // that type; such dereferencing occurs for all levels of indirection, so +// // this pattern works just as well for `std::vector<int**>` as for +// // `std::vector<int*>`. +// int x = 1, y = 2, z = 3; +// std::vector<int*> v = {&x, &y, &z}; +// TString s = y_absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 5: +// // Dereferencing of `std::unique_ptr<>` is also supported: +// std::vector<std::unique_ptr<int>> v +// v.emplace_back(new int(1)); +// v.emplace_back(new int(2)); +// v.emplace_back(new int(3)); +// TString s = y_absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 6: +// // Joins a `std::map`, with each key-value pair separated by an equals +// // sign. This pattern would also work with, say, a +// // `std::vector<std::pair<>>`. +// std::map<TString, int> m = { +// std::make_pair("a", 1), +// std::make_pair("b", 2), +// std::make_pair("c", 3)}; +// TString s = y_absl::StrJoin(m, ",", y_absl::PairFormatter("=")); +// EXPECT_EQ("a=1,b=2,c=3", s); +// +// Example 7: +// // These examples show how `y_absl::StrJoin()` handles a few common edge +// // cases: +// std::vector<TString> v_empty; +// EXPECT_EQ("", y_absl::StrJoin(v_empty, "-")); +// +// std::vector<TString> v_one_item = {"foo"}; +// EXPECT_EQ("foo", y_absl::StrJoin(v_one_item, "-")); +// +// std::vector<TString> v_empty_string = {""}; +// EXPECT_EQ("", y_absl::StrJoin(v_empty_string, "-")); +// +// std::vector<TString> v_one_item_empty_string = {"a", ""}; +// EXPECT_EQ("a-", y_absl::StrJoin(v_one_item_empty_string, "-")); +// +// std::vector<TString> v_two_empty_string = {"", ""}; +// EXPECT_EQ("-", y_absl::StrJoin(v_two_empty_string, "-")); +// +// Example 8: +// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to +// // a TString using the `y_absl::AlphaNum` class. +// TString s = y_absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-"); +// EXPECT_EQ("123-abc-0.456", s); + +template <typename Iterator, typename Formatter> +TString StrJoin(Iterator start, Iterator end, y_absl::string_view sep, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(start, end, sep, fmt); +} + +template <typename Range, typename Formatter> +TString StrJoin(const Range& range, y_absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(range, separator, fmt); +} + +template <typename T, typename Formatter> +TString StrJoin(std::initializer_list<T> il, y_absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(il, separator, fmt); +} + +template <typename... T, typename Formatter> +TString StrJoin(const std::tuple<T...>& value, y_absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(value, separator, fmt); +} + +template <typename Iterator> +TString StrJoin(Iterator start, Iterator end, y_absl::string_view separator) { + return strings_internal::JoinRange(start, end, separator); +} + +template <typename Range> +TString StrJoin(const Range& range, y_absl::string_view separator) { + return strings_internal::JoinRange(range, separator); +} + +template <typename T> +TString StrJoin(std::initializer_list<T> il, + y_absl::string_view separator) { + return strings_internal::JoinRange(il, separator); +} + +template <typename... T> +TString StrJoin(const std::tuple<T...>& value, + y_absl::string_view separator) { + return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter()); +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STR_JOIN_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.cc new file mode 100644 index 0000000000..77b78c6c16 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.cc @@ -0,0 +1,82 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/str_replace.h" + +#include "y_absl/strings/str_cat.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +using FixedMapping = + std::initializer_list<std::pair<y_absl::string_view, y_absl::string_view>>; + +// Applies the ViableSubstitutions in subs_ptr to the y_absl::string_view s, and +// stores the result in *result_ptr. Returns the number of substitutions that +// occurred. +int ApplySubstitutions( + y_absl::string_view s, + std::vector<strings_internal::ViableSubstitution>* subs_ptr, + TString* result_ptr) { + auto& subs = *subs_ptr; + int substitutions = 0; + size_t pos = 0; + while (!subs.empty()) { + auto& sub = subs.back(); + if (sub.offset >= pos) { + if (pos <= s.size()) { + StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement); + } + pos = sub.offset + sub.old.size(); + substitutions += 1; + } + sub.offset = s.find(sub.old, pos); + if (sub.offset == s.npos) { + subs.pop_back(); + } else { + // Insertion sort to ensure the last ViableSubstitution continues to be + // before all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + } + result_ptr->append(s.data() + pos, s.size() - pos); + return substitutions; +} + +} // namespace strings_internal + +// We can implement this in terms of the generic StrReplaceAll, but +// we must specify the template overload because C++ cannot deduce the type +// of an initializer_list parameter to a function, and also if we don't specify +// the type, we just call ourselves. +// +// Note that we implement them here, rather than in the header, so that they +// aren't inlined. + +TString StrReplaceAll(y_absl::string_view s, + strings_internal::FixedMapping replacements) { + return StrReplaceAll<strings_internal::FixedMapping>(s, replacements); +} + +int StrReplaceAll(strings_internal::FixedMapping replacements, + TString* target) { + return StrReplaceAll<strings_internal::FixedMapping>(replacements, target); +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.h new file mode 100644 index 0000000000..42c85616a0 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_replace.h @@ -0,0 +1,219 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_replace.h +// ----------------------------------------------------------------------------- +// +// This file defines `y_absl::StrReplaceAll()`, a general-purpose string +// replacement function designed for large, arbitrary text substitutions, +// especially on strings which you are receiving from some other system for +// further processing (e.g. processing regular expressions, escaping HTML +// entities, etc.). `StrReplaceAll` is designed to be efficient even when only +// one substitution is being performed, or when substitution is rare. +// +// If the string being modified is known at compile-time, and the substitutions +// vary, `y_absl::Substitute()` may be a better choice. +// +// Example: +// +// TString html_escaped = y_absl::StrReplaceAll(user_input, { +// {"&", "&"}, +// {"<", "<"}, +// {">", ">"}, +// {"\"", """}, +// {"'", "'"}}); +#ifndef ABSL_STRINGS_STR_REPLACE_H_ +#define ABSL_STRINGS_STR_REPLACE_H_ + +#include <util/generic/string.h> +#include <utility> +#include <vector> + +#include "y_absl/base/attributes.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// StrReplaceAll() +// +// Replaces character sequences within a given string with replacements provided +// within an initializer list of key/value pairs. Candidate replacements are +// considered in order as they occur within the string, with earlier matches +// taking precedence, and longer matches taking precedence for candidates +// starting at the same position in the string. Once a substitution is made, the +// replaced text is not considered for any further substitutions. +// +// Example: +// +// TString s = y_absl::StrReplaceAll( +// "$who bought $count #Noun. Thanks $who!", +// {{"$count", y_absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +ABSL_MUST_USE_RESULT TString StrReplaceAll( + y_absl::string_view s, + std::initializer_list<std::pair<y_absl::string_view, y_absl::string_view>> + replacements); + +// Overload of `StrReplaceAll()` to accept a container of key/value replacement +// pairs (typically either an associative map or a `std::vector` of `std::pair` +// elements). A vector of pairs is generally more efficient. +// +// Examples: +// +// std::map<const y_absl::string_view, const y_absl::string_view> replacements; +// replacements["$who"] = "Bob"; +// replacements["$count"] = "5"; +// replacements["#Noun"] = "Apples"; +// TString s = y_absl::StrReplaceAll( +// "$who bought $count #Noun. Thanks $who!", +// replacements); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +// +// // A std::vector of std::pair elements can be more efficient. +// std::vector<std::pair<const y_absl::string_view, TString>> replacements; +// replacements.push_back({"&", "&"}); +// replacements.push_back({"<", "<"}); +// replacements.push_back({">", ">"}); +// TString s = y_absl::StrReplaceAll("if (ptr < &foo)", +// replacements); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +TString StrReplaceAll(y_absl::string_view s, + const StrToStrMapping& replacements); + +// Overload of `StrReplaceAll()` to replace character sequences within a given +// output string *in place* with replacements provided within an initializer +// list of key/value pairs, returning the number of substitutions that occurred. +// +// Example: +// +// TString s = TString("$who bought $count #Noun. Thanks $who!"); +// int count; +// count = y_absl::StrReplaceAll({{"$count", y_absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}, &s); +// EXPECT_EQ(count, 4); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +int StrReplaceAll( + std::initializer_list<std::pair<y_absl::string_view, y_absl::string_view>> + replacements, + TString* target); + +// Overload of `StrReplaceAll()` to replace patterns within a given output +// string *in place* with replacements provided within a container of key/value +// pairs. +// +// Example: +// +// TString s = TString("if (ptr < &foo)"); +// int count = y_absl::StrReplaceAll({{"&", "&"}, +// {"<", "<"}, +// {">", ">"}}, &s); +// EXPECT_EQ(count, 2); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, TString* target); + +// Implementation details only, past this point. +namespace strings_internal { + +struct ViableSubstitution { + y_absl::string_view old; + y_absl::string_view replacement; + size_t offset; + + ViableSubstitution(y_absl::string_view old_str, + y_absl::string_view replacement_str, size_t offset_val) + : old(old_str), replacement(replacement_str), offset(offset_val) {} + + // One substitution occurs "before" another (takes priority) if either + // it has the lowest offset, or it has the same offset but a larger size. + bool OccursBefore(const ViableSubstitution& y) const { + if (offset != y.offset) return offset < y.offset; + return old.size() > y.old.size(); + } +}; + +// Build a vector of ViableSubstitutions based on the given list of +// replacements. subs can be implemented as a priority_queue. However, it turns +// out that most callers have small enough a list of substitutions that the +// overhead of such a queue isn't worth it. +template <typename StrToStrMapping> +std::vector<ViableSubstitution> FindSubstitutions( + y_absl::string_view s, const StrToStrMapping& replacements) { + std::vector<ViableSubstitution> subs; + subs.reserve(replacements.size()); + + for (const auto& rep : replacements) { + using std::get; + y_absl::string_view old(get<0>(rep)); + + size_t pos = s.find(old); + if (pos == s.npos) continue; + + // Ignore attempts to replace "". This condition is almost never true, + // but above condition is frequently true. That's why we test for this + // now and not before. + if (old.empty()) continue; + + subs.emplace_back(old, get<1>(rep), pos); + + // Insertion sort to ensure the last ViableSubstitution comes before + // all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + return subs; +} + +int ApplySubstitutions(y_absl::string_view s, + std::vector<ViableSubstitution>* subs_ptr, + TString* result_ptr); + +} // namespace strings_internal + +template <typename StrToStrMapping> +TString StrReplaceAll(y_absl::string_view s, + const StrToStrMapping& replacements) { + auto subs = strings_internal::FindSubstitutions(s, replacements); + TString result; + result.reserve(s.size()); + strings_internal::ApplySubstitutions(s, &subs, &result); + return result; +} + +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, TString* target) { + auto subs = strings_internal::FindSubstitutions(*target, replacements); + if (subs.empty()) return 0; + + TString result; + result.reserve(target->size()); + int substitutions = + strings_internal::ApplySubstitutions(*target, &subs, &result); + target->swap(result); + return substitutions; +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STR_REPLACE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.cc new file mode 100644 index 0000000000..5f9193e6ba --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.cc @@ -0,0 +1,139 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/str_split.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <limits> +#include <memory> + +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/strings/ascii.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +namespace { + +// This GenericFind() template function encapsulates the finding algorithm +// shared between the ByString and ByAnyChar delimiters. The FindPolicy +// template parameter allows each delimiter to customize the actual find +// function to use and the length of the found delimiter. For example, the +// Literal delimiter will ultimately use y_absl::string_view::find(), and the +// AnyOf delimiter will use y_absl::string_view::find_first_of(). +template <typename FindPolicy> +y_absl::string_view GenericFind(y_absl::string_view text, + y_absl::string_view delimiter, size_t pos, + FindPolicy find_policy) { + if (delimiter.empty() && text.length() > 0) { + // Special case for empty string delimiters: always return a zero-length + // y_absl::string_view referring to the item at position 1 past pos. + return y_absl::string_view(text.data() + pos + 1, 0); + } + size_t found_pos = y_absl::string_view::npos; + y_absl::string_view found(text.data() + text.size(), + 0); // By default, not found + found_pos = find_policy.Find(text, delimiter, pos); + if (found_pos != y_absl::string_view::npos) { + found = y_absl::string_view(text.data() + found_pos, + find_policy.Length(delimiter)); + } + return found; +} + +// Finds using y_absl::string_view::find(), therefore the length of the found +// delimiter is delimiter.length(). +struct LiteralPolicy { + size_t Find(y_absl::string_view text, y_absl::string_view delimiter, size_t pos) { + return text.find(delimiter, pos); + } + size_t Length(y_absl::string_view delimiter) { return delimiter.length(); } +}; + +// Finds using y_absl::string_view::find_first_of(), therefore the length of the +// found delimiter is 1. +struct AnyOfPolicy { + size_t Find(y_absl::string_view text, y_absl::string_view delimiter, size_t pos) { + return text.find_first_of(delimiter, pos); + } + size_t Length(y_absl::string_view /* delimiter */) { return 1; } +}; + +} // namespace + +// +// ByString +// + +ByString::ByString(y_absl::string_view sp) : delimiter_(sp) {} + +y_absl::string_view ByString::Find(y_absl::string_view text, size_t pos) const { + if (delimiter_.length() == 1) { + // Much faster to call find on a single character than on an + // y_absl::string_view. + size_t found_pos = text.find(delimiter_[0], pos); + if (found_pos == y_absl::string_view::npos) + return y_absl::string_view(text.data() + text.size(), 0); + return text.substr(found_pos, 1); + } + return GenericFind(text, delimiter_, pos, LiteralPolicy()); +} + +// +// ByChar +// + +y_absl::string_view ByChar::Find(y_absl::string_view text, size_t pos) const { + size_t found_pos = text.find(c_, pos); + if (found_pos == y_absl::string_view::npos) + return y_absl::string_view(text.data() + text.size(), 0); + return text.substr(found_pos, 1); +} + +// +// ByAnyChar +// + +ByAnyChar::ByAnyChar(y_absl::string_view sp) : delimiters_(sp) {} + +y_absl::string_view ByAnyChar::Find(y_absl::string_view text, size_t pos) const { + return GenericFind(text, delimiters_, pos, AnyOfPolicy()); +} + +// +// ByLength +// +ByLength::ByLength(ptrdiff_t length) : length_(length) { + ABSL_RAW_CHECK(length > 0, ""); +} + +y_absl::string_view ByLength::Find(y_absl::string_view text, + size_t pos) const { + pos = std::min(pos, text.size()); // truncate `pos` + y_absl::string_view substr = text.substr(pos); + // If the string is shorter than the chunk size we say we + // "can't find the delimiter" so this will be the last chunk. + if (substr.length() <= static_cast<size_t>(length_)) + return y_absl::string_view(text.data() + text.size(), 0); + + return y_absl::string_view(substr.data() + length_, 0); +} + +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.h new file mode 100644 index 0000000000..d32d54813e --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/str_split.h @@ -0,0 +1,548 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_split.h +// ----------------------------------------------------------------------------- +// +// This file contains functions for splitting strings. It defines the main +// `StrSplit()` function, several delimiters for determining the boundaries on +// which to split the string, and predicates for filtering delimited results. +// `StrSplit()` adapts the returned collection to the type specified by the +// caller. +// +// Example: +// +// // Splits the given string on commas. Returns the results in a +// // vector of strings. +// std::vector<TString> v = y_absl::StrSplit("a,b,c", ','); +// // Can also use "," +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// See StrSplit() below for more information. +#ifndef ABSL_STRINGS_STR_SPLIT_H_ +#define ABSL_STRINGS_STR_SPLIT_H_ + +#include <algorithm> +#include <cstddef> +#include <map> +#include <set> +#include <util/generic/string.h> +#include <utility> +#include <vector> + +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/base/macros.h" +#include "y_absl/strings/internal/str_split_internal.h" +#include "y_absl/strings/string_view.h" +#include "y_absl/strings/strip.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +//------------------------------------------------------------------------------ +// Delimiters +//------------------------------------------------------------------------------ +// +// `StrSplit()` uses delimiters to define the boundaries between elements in the +// provided input. Several `Delimiter` types are defined below. If a string +// (`const char*`, `TString`, or `y_absl::string_view`) is passed in place of +// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it +// were passed a `ByString` delimiter. +// +// A `Delimiter` is an object with a `Find()` function that knows how to find +// the first occurrence of itself in a given `y_absl::string_view`. +// +// The following `Delimiter` types are available for use within `StrSplit()`: +// +// - `ByString` (default for string arguments) +// - `ByChar` (default for a char argument) +// - `ByAnyChar` +// - `ByLength` +// - `MaxSplits` +// +// A Delimiter's `Find()` member function will be passed an input `text` that is +// to be split and a position (`pos`) to begin searching for the next delimiter +// in `text`. The returned y_absl::string_view should refer to the next occurrence +// (after `pos`) of the represented delimiter; this returned y_absl::string_view +// represents the next location where the input `text` should be broken. +// +// The returned y_absl::string_view may be zero-length if the Delimiter does not +// represent a part of the string (e.g., a fixed-length delimiter). If no +// delimiter is found in the input `text`, a zero-length y_absl::string_view +// referring to `text.end()` should be returned (e.g., +// `text.substr(text.size())`). It is important that the returned +// y_absl::string_view always be within the bounds of the input `text` given as an +// argument--it must not refer to a string that is physically located outside of +// the given string. +// +// The following example is a simple Delimiter object that is created with a +// single char and will look for that char in the text passed to the `Find()` +// function: +// +// struct SimpleDelimiter { +// const char c_; +// explicit SimpleDelimiter(char c) : c_(c) {} +// y_absl::string_view Find(y_absl::string_view text, size_t pos) { +// auto found = text.find(c_, pos); +// if (found == y_absl::string_view::npos) +// return text.substr(text.size()); +// +// return text.substr(found, 1); +// } +// }; + +// ByString +// +// A sub-string delimiter. If `StrSplit()` is passed a string in place of a +// `Delimiter` object, the string will be implicitly converted into a +// `ByString` delimiter. +// +// Example: +// +// // Because a string literal is converted to an `y_absl::ByString`, +// // the following two splits are equivalent. +// +// std::vector<TString> v1 = y_absl::StrSplit("a, b, c", ", "); +// +// using y_absl::ByString; +// std::vector<TString> v2 = y_absl::StrSplit("a, b, c", +// ByString(", ")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +class ByString { + public: + explicit ByString(y_absl::string_view sp); + y_absl::string_view Find(y_absl::string_view text, size_t pos) const; + + private: + const TString delimiter_; +}; + +// ByChar +// +// A single character delimiter. `ByChar` is functionally equivalent to a +// 1-char string within a `ByString` delimiter, but slightly more efficient. +// +// Example: +// +// // Because a char literal is converted to a y_absl::ByChar, +// // the following two splits are equivalent. +// std::vector<TString> v1 = y_absl::StrSplit("a,b,c", ','); +// using y_absl::ByChar; +// std::vector<TString> v2 = y_absl::StrSplit("a,b,c", ByChar(',')); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// `ByChar` is also the default delimiter if a single character is given +// as the delimiter to `StrSplit()`. For example, the following calls are +// equivalent: +// +// std::vector<TString> v = y_absl::StrSplit("a-b", '-'); +// +// using y_absl::ByChar; +// std::vector<TString> v = y_absl::StrSplit("a-b", ByChar('-')); +// +class ByChar { + public: + explicit ByChar(char c) : c_(c) {} + y_absl::string_view Find(y_absl::string_view text, size_t pos) const; + + private: + char c_; +}; + +// ByAnyChar +// +// A delimiter that will match any of the given byte-sized characters within +// its provided string. +// +// Note: this delimiter works with single-byte string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using y_absl::ByAnyChar; +// std::vector<TString> v = y_absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// If `ByAnyChar` is given the empty string, it behaves exactly like +// `ByString` and matches each individual character in the input string. +// +class ByAnyChar { + public: + explicit ByAnyChar(y_absl::string_view sp); + y_absl::string_view Find(y_absl::string_view text, size_t pos) const; + + private: + const TString delimiters_; +}; + +// ByLength +// +// A delimiter for splitting into equal-length strings. The length argument to +// the constructor must be greater than 0. +// +// Note: this delimiter works with single-byte string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using y_absl::ByLength; +// std::vector<TString> v = y_absl::StrSplit("123456789", ByLength(3)); + +// // v[0] == "123", v[1] == "456", v[2] == "789" +// +// Note that the string does not have to be a multiple of the fixed split +// length. In such a case, the last substring will be shorter. +// +// using y_absl::ByLength; +// std::vector<TString> v = y_absl::StrSplit("12345", ByLength(2)); +// +// // v[0] == "12", v[1] == "34", v[2] == "5" +class ByLength { + public: + explicit ByLength(ptrdiff_t length); + y_absl::string_view Find(y_absl::string_view text, size_t pos) const; + + private: + const ptrdiff_t length_; +}; + +namespace strings_internal { + +// A traits-like metafunction for selecting the default Delimiter object type +// for a particular Delimiter type. The base case simply exposes type Delimiter +// itself as the delimiter's Type. However, there are specializations for +// string-like objects that map them to the ByString delimiter object. +// This allows functions like y_absl::StrSplit() and y_absl::MaxSplits() to accept +// string-like objects (e.g., ',') as delimiter arguments but they will be +// treated as if a ByString delimiter was given. +template <typename Delimiter> +struct SelectDelimiter { + using type = Delimiter; +}; + +template <> +struct SelectDelimiter<char> { + using type = ByChar; +}; +template <> +struct SelectDelimiter<char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<const char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<y_absl::string_view> { + using type = ByString; +}; +template <> +struct SelectDelimiter<TString> { + using type = ByString; +}; + +// Wraps another delimiter and sets a max number of matches for that delimiter. +template <typename Delimiter> +class MaxSplitsImpl { + public: + MaxSplitsImpl(Delimiter delimiter, int limit) + : delimiter_(delimiter), limit_(limit), count_(0) {} + y_absl::string_view Find(y_absl::string_view text, size_t pos) { + if (count_++ == limit_) { + return y_absl::string_view(text.data() + text.size(), + 0); // No more matches. + } + return delimiter_.Find(text, pos); + } + + private: + Delimiter delimiter_; + const int limit_; + int count_; +}; + +} // namespace strings_internal + +// MaxSplits() +// +// A delimiter that limits the number of matches which can occur to the passed +// `limit`. The last element in the returned collection will contain all +// remaining unsplit pieces, which may contain instances of the delimiter. +// The collection will contain at most `limit` + 1 elements. +// Example: +// +// using y_absl::MaxSplits; +// std::vector<TString> v = y_absl::StrSplit("a,b,c", MaxSplits(',', 1)); +// +// // v[0] == "a", v[1] == "b,c" +template <typename Delimiter> +inline strings_internal::MaxSplitsImpl< + typename strings_internal::SelectDelimiter<Delimiter>::type> +MaxSplits(Delimiter delimiter, int limit) { + typedef + typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; + return strings_internal::MaxSplitsImpl<DelimiterType>( + DelimiterType(delimiter), limit); +} + +//------------------------------------------------------------------------------ +// Predicates +//------------------------------------------------------------------------------ +// +// Predicates filter the results of a `StrSplit()` by determining whether or not +// a resultant element is included in the result set. A predicate may be passed +// as an optional third argument to the `StrSplit()` function. +// +// Predicates are unary functions (or functors) that take a single +// `y_absl::string_view` argument and return a bool indicating whether the +// argument should be included (`true`) or excluded (`false`). +// +// Predicates are useful when filtering out empty substrings. By default, empty +// substrings may be returned by `StrSplit()`, which is similar to the way split +// functions work in other programming languages. + +// AllowEmpty() +// +// Always returns `true`, indicating that all strings--including empty +// strings--should be included in the split output. This predicate is not +// strictly needed because this is the default behavior of `StrSplit()`; +// however, it might be useful at some call sites to make the intent explicit. +// +// Example: +// +// std::vector<TString> v = y_absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); +// +// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" +struct AllowEmpty { + bool operator()(y_absl::string_view) const { return true; } +}; + +// SkipEmpty() +// +// Returns `false` if the given `y_absl::string_view` is empty, indicating that +// `StrSplit()` should omit the empty string. +// +// Example: +// +// std::vector<TString> v = y_absl::StrSplit(",a,,b,", ',', SkipEmpty()); +// +// // v[0] == "a", v[1] == "b" +// +// Note: `SkipEmpty()` does not consider a string containing only whitespace +// to be empty. To skip such whitespace as well, use the `SkipWhitespace()` +// predicate. +struct SkipEmpty { + bool operator()(y_absl::string_view sp) const { return !sp.empty(); } +}; + +// SkipWhitespace() +// +// Returns `false` if the given `y_absl::string_view` is empty *or* contains only +// whitespace, indicating that `StrSplit()` should omit the string. +// +// Example: +// +// std::vector<TString> v = y_absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == " a ", v[1] == "b" +// +// // SkipEmpty() would return whitespace elements +// std::vector<TString> v = y_absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); +// // v[0] == " a ", v[1] == " ", v[2] == "b" +struct SkipWhitespace { + bool operator()(y_absl::string_view sp) const { + sp = y_absl::StripAsciiWhitespace(sp); + return !sp.empty(); + } +}; + +template <typename T> +using EnableSplitIfString = + typename std::enable_if<std::is_same<T, TString>::value || + std::is_same<T, const TString>::value, + int>::type; + +//------------------------------------------------------------------------------ +// StrSplit() +//------------------------------------------------------------------------------ + +// StrSplit() +// +// Splits a given string based on the provided `Delimiter` object, returning the +// elements within the type specified by the caller. Optionally, you may pass a +// `Predicate` to `StrSplit()` indicating whether to include or exclude the +// resulting element within the final result set. (See the overviews for +// Delimiters and Predicates above.) +// +// Example: +// +// std::vector<TString> v = y_absl::StrSplit("a,b,c,d", ','); +// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" +// +// You can also provide an explicit `Delimiter` object: +// +// Example: +// +// using y_absl::ByAnyChar; +// std::vector<TString> v = y_absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// See above for more information on delimiters. +// +// By default, empty strings are included in the result set. You can optionally +// include a third `Predicate` argument to apply a test for whether the +// resultant element should be included in the result set: +// +// Example: +// +// std::vector<TString> v = y_absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == " a ", v[1] == "b" +// +// See above for more information on predicates. +// +//------------------------------------------------------------------------------ +// StrSplit() Return Types +//------------------------------------------------------------------------------ +// +// The `StrSplit()` function adapts the returned collection to the collection +// specified by the caller (e.g. `std::vector` above). The returned collections +// may contain `TString`, `y_absl::string_view` (in which case the original +// string being split must ensure that it outlives the collection), or any +// object that can be explicitly created from an `y_absl::string_view`. This +// behavior works for: +// +// 1) All standard STL containers including `std::vector`, `std::list`, +// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` +// 2) `std::pair` (which is not actually a container). See below. +// +// Example: +// +// // The results are returned as `y_absl::string_view` objects. Note that we +// // have to ensure that the input string outlives any results. +// std::vector<y_absl::string_view> v = y_absl::StrSplit("a,b,c", ','); +// +// // Stores results in a std::set<TString>, which also performs +// // de-duplication and orders the elements in ascending order. +// std::set<TString> a = y_absl::StrSplit("b,a,c,a,b", ','); +// // v[0] == "a", v[1] == "b", v[2] = "c" +// +// // `StrSplit()` can be used within a range-based for loop, in which case +// // each element will be of type `y_absl::string_view`. +// std::vector<TString> v; +// for (const auto sv : y_absl::StrSplit("a,b,c", ',')) { +// if (sv != "b") v.emplace_back(sv); +// } +// // v[0] == "a", v[1] == "c" +// +// // Stores results in a map. The map implementation assumes that the input +// // is provided as a series of key/value pairs. For example, the 0th element +// // resulting from the split will be stored as a key to the 1st element. If +// // an odd number of elements are resolved, the last element is paired with +// // a default-constructed value (e.g., empty string). +// std::map<TString, TString> m = y_absl::StrSplit("a,b,c", ','); +// // m["a"] == "b", m["c"] == "" // last component value equals "" +// +// Splitting to `std::pair` is an interesting case because it can hold only two +// elements and is not a collection type. When splitting to a `std::pair` the +// first two split strings become the `std::pair` `.first` and `.second` +// members, respectively. The remaining split substrings are discarded. If there +// are less than two split substrings, the empty string is used for the +// corresponding +// `std::pair` member. +// +// Example: +// +// // Stores first two split strings as the members in a std::pair. +// std::pair<TString, TString> p = y_absl::StrSplit("a,b,c", ','); +// // p.first == "a", p.second == "b" // "c" is omitted. +// +// The `StrSplit()` function can be used multiple times to perform more +// complicated splitting logic, such as intelligently parsing key-value pairs. +// +// Example: +// +// // The input string "a=b=c,d=e,f=,g" becomes +// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } +// std::map<TString, TString> m; +// for (y_absl::string_view sp : y_absl::StrSplit("a=b=c,d=e,f=,g", ',')) { +// m.insert(y_absl::StrSplit(sp, y_absl::MaxSplits('=', 1))); +// } +// EXPECT_EQ("b=c", m.find("a")->second); +// EXPECT_EQ("e", m.find("d")->second); +// EXPECT_EQ("", m.find("f")->second); +// EXPECT_EQ("", m.find("g")->second); +// +// WARNING: Due to a legacy bug that is maintained for backward compatibility, +// splitting the following empty string_views produces different results: +// +// y_absl::StrSplit(y_absl::string_view(""), '-'); // {""} +// y_absl::StrSplit(y_absl::string_view(), '-'); // {}, but should be {""} +// +// Try not to depend on this distinction because the bug may one day be fixed. +template <typename Delimiter> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + y_absl::string_view> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, + y_absl::string_view>( + text.value(), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + TString> +StrSplit(StringType&& text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, TString>( + std::move(text), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename Predicate> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + y_absl::string_view> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, + Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, + y_absl::string_view>( + text.value(), DelimiterType(d), std::move(p)); +} + +template <typename Delimiter, typename Predicate, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + TString> +StrSplit(StringType&& text, Delimiter d, Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, TString>( + std::move(text), DelimiterType(d), std::move(p)); +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STR_SPLIT_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.cc new file mode 100644 index 0000000000..9893c7ab99 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.cc @@ -0,0 +1,230 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/string_view.h" + +#ifndef ABSL_USES_STD_STRING_VIEW + +#include <algorithm> +#include <climits> +#include <cstring> +#include <ostream> + +#include "y_absl/strings/internal/memutil.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +namespace { +void WritePadding(std::ostream& o, size_t pad) { + char fill_buf[32]; + memset(fill_buf, o.fill(), sizeof(fill_buf)); + while (pad) { + size_t n = std::min(pad, sizeof(fill_buf)); + o.write(fill_buf, n); + pad -= n; + } +} + +class LookupTable { + public: + // For each character in wanted, sets the index corresponding + // to the ASCII code of that character. This is used by + // the find_.*_of methods below to tell whether or not a character is in + // the lookup table in constant time. + explicit LookupTable(string_view wanted) { + for (char c : wanted) { + table_[Index(c)] = true; + } + } + bool operator[](char c) const { return table_[Index(c)]; } + + private: + static unsigned char Index(char c) { return static_cast<unsigned char>(c); } + bool table_[UCHAR_MAX + 1] = {}; +}; + +} // namespace + +std::ostream& operator<<(std::ostream& o, string_view piece) { + std::ostream::sentry sentry(o); + if (sentry) { + size_t lpad = 0; + size_t rpad = 0; + if (static_cast<size_t>(o.width()) > piece.size()) { + size_t pad = o.width() - piece.size(); + if ((o.flags() & o.adjustfield) == o.left) { + rpad = pad; + } else { + lpad = pad; + } + } + if (lpad) WritePadding(o, lpad); + o.write(piece.data(), piece.size()); + if (rpad) WritePadding(o, rpad); + o.width(0); + } + return o; +} + +string_view::size_type string_view::find(string_view s, + size_type pos) const noexcept { + if (empty() || pos > length_) { + if (empty() && pos == 0 && s.empty()) return 0; + return npos; + } + const char* result = + strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + return result ? result - ptr_ : npos; +} + +string_view::size_type string_view::find(char c, size_type pos) const noexcept { + if (empty() || pos >= length_) { + return npos; + } + const char* result = + static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos)); + return result != nullptr ? result - ptr_ : npos; +} + +string_view::size_type string_view::rfind(string_view s, + size_type pos) const noexcept { + if (length_ < s.length_) return npos; + if (s.empty()) return std::min(length_, pos); + const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; + const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); + return result != last ? result - ptr_ : npos; +} + +// Search range is [0..pos] inclusive. If pos == npos, search everything. +string_view::size_type string_view::rfind(char c, + size_type pos) const noexcept { + // Note: memrchr() is not available on Windows. + if (empty()) return npos; + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (ptr_[i] == c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_first_of( + string_view s, size_type pos) const noexcept { + if (empty() || s.empty()) { + return npos; + } + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of( + string_view s, size_type pos) const noexcept { + if (empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (!tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of( + char c, size_type pos) const noexcept { + if (empty()) return npos; + for (; pos < length_; ++pos) { + if (ptr_[pos] != c) { + return pos; + } + } + return npos; +} + +string_view::size_type string_view::find_last_of(string_view s, + size_type pos) const noexcept { + if (empty() || s.empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of( + string_view s, size_type pos) const noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + if (s.empty()) return i; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (;; --i) { + if (!tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of( + char c, size_type pos) const noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + for (;; --i) { + if (ptr_[i] != c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +// MSVC has non-standard behavior that implicitly creates definitions for static +// const members. These implicit definitions conflict with explicit out-of-class +// member definitions that are required by the C++ standard, resulting in +// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks +// MSVC to choose only one definition for the symbol it decorates. See details +// at https://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx +#ifdef _MSC_VER +#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany) +#else +#define ABSL_STRING_VIEW_SELECTANY +#endif + +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::npos; +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::kMaxSize; + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_USES_STD_STRING_VIEW diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.h new file mode 100644 index 0000000000..c3906fe1c5 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/string_view.h @@ -0,0 +1,712 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: string_view.h +// ----------------------------------------------------------------------------- +// +// This file contains the definition of the `y_absl::string_view` class. A +// `string_view` points to a contiguous span of characters, often part or all of +// another `TString`, double-quoted string literal, character array, or even +// another `string_view`. +// +// This `y_absl::string_view` abstraction is designed to be a drop-in +// replacement for the C++17 `std::string_view` abstraction. +#ifndef ABSL_STRINGS_STRING_VIEW_H_ +#define ABSL_STRINGS_STRING_VIEW_H_ + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <iosfwd> +#include <iterator> +#include <limits> +#include <util/generic/string.h> + +#include "y_absl/base/attributes.h" +#include "y_absl/base/config.h" +#include "y_absl/base/internal/throw_delegate.h" +#include "y_absl/base/macros.h" +#include "y_absl/base/optimization.h" +#include "y_absl/base/port.h" + +#ifdef ABSL_USES_STD_STRING_VIEW + +#include <string_view> // IWYU pragma: export + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +using string_view = std::string_view; +ABSL_NAMESPACE_END +} // namespace y_absl + +#else // ABSL_USES_STD_STRING_VIEW + +#error "std::string_view should be used in all configurations" + +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp +#else // ABSL_HAVE_BUILTIN(__builtin_memcmp) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp +#endif // ABSL_HAVE_BUILTIN(__builtin_memcmp) + +#if defined(__cplusplus) && __cplusplus >= 201402L +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR constexpr +#else +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR +#endif + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// y_absl::string_view +// +// A `string_view` provides a lightweight view into the string data provided by +// a `TString`, double-quoted string literal, character array, or even +// another `string_view`. A `string_view` does *not* own the string to which it +// points, and that data cannot be modified through the view. +// +// You can use `string_view` as a function or method parameter anywhere a +// parameter can receive a double-quoted string literal, `const char*`, +// `TString`, or another `y_absl::string_view` argument with no need to copy +// the string data. Systematic use of `string_view` within function arguments +// reduces data copies and `strlen()` calls. +// +// Because of its small size, prefer passing `string_view` by value: +// +// void MyFunction(y_absl::string_view arg); +// +// If circumstances require, you may also pass one by const reference: +// +// void MyFunction(const y_absl::string_view& arg); // not preferred +// +// Passing by value generates slightly smaller code for many architectures. +// +// In either case, the source data of the `string_view` must outlive the +// `string_view` itself. +// +// A `string_view` is also suitable for local variables if you know that the +// lifetime of the underlying object is longer than the lifetime of your +// `string_view` variable. However, beware of binding a `string_view` to a +// temporary value: +// +// // BAD use of string_view: lifetime problem +// y_absl::string_view sv = obj.ReturnAString(); +// +// // GOOD use of string_view: str outlives sv +// TString str = obj.ReturnAString(); +// y_absl::string_view sv = str; +// +// Due to lifetime issues, a `string_view` is sometimes a poor choice for a +// return value and usually a poor choice for a data member. If you do use a +// `string_view` this way, it is your responsibility to ensure that the object +// pointed to by the `string_view` outlives the `string_view`. +// +// A `string_view` may represent a whole string or just part of a string. For +// example, when splitting a string, `std::vector<y_absl::string_view>` is a +// natural data type for the output. +// +// For another example, a Cord is a non-contiguous, potentially very +// long string-like object. The Cord class has an interface that iteratively +// provides string_view objects that point to the successive pieces of a Cord +// object. +// +// When constructed from a source which is NUL-terminated, the `string_view` +// itself will not include the NUL-terminator unless a specific size (including +// the NUL) is passed to the constructor. As a result, common idioms that work +// on NUL-terminated strings do not work on `string_view` objects. If you write +// code that scans a `string_view`, you must check its length rather than test +// for nul, for example. Note, however, that nuls may still be embedded within +// a `string_view` explicitly. +// +// You may create a null `string_view` in two ways: +// +// y_absl::string_view sv; +// y_absl::string_view sv(nullptr, 0); +// +// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and +// `sv.empty() == true`. Also, if you create a `string_view` with a non-null +// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to +// signal an undefined value that is different from other `string_view` values +// in a similar fashion to how `const char* p1 = nullptr;` is different from +// `const char* p2 = "";`. However, in practice, it is not recommended to rely +// on this behavior. +// +// Be careful not to confuse a null `string_view` with an empty one. A null +// `string_view` is an empty `string_view`, but some empty `string_view`s are +// not null. Prefer checking for emptiness over checking for null. +// +// There are many ways to create an empty string_view: +// +// const char* nullcp = nullptr; +// // string_view.size() will return 0 in all cases. +// y_absl::string_view(); +// y_absl::string_view(nullcp, 0); +// y_absl::string_view(""); +// y_absl::string_view("", 0); +// y_absl::string_view("abcdef", 0); +// y_absl::string_view("abcdef" + 6, 0); +// +// All empty `string_view` objects whether null or not, are equal: +// +// y_absl::string_view() == y_absl::string_view("", 0) +// y_absl::string_view(nullptr, 0) == y_absl::string_view("abcdef"+6, 0) +class string_view { + public: + using traits_type = std::char_traits<char>; + using value_type = char; + using pointer = char*; + using const_pointer = const char*; + using reference = char&; + using const_reference = const char&; + using const_iterator = const char*; + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + using reverse_iterator = const_reverse_iterator; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + + static constexpr size_type npos = static_cast<size_type>(-1); + + // Null `string_view` constructor + constexpr string_view() noexcept : ptr_(nullptr), length_(0) {} + + // Implicit constructors + + template <typename Allocator> + string_view( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& str + ABSL_ATTRIBUTE_LIFETIME_BOUND) noexcept + // This is implemented in terms of `string_view(p, n)` so `str.size()` + // doesn't need to be reevaluated after `ptr_` is set. + // The length check is also skipped since it is unnecessary and causes + // code bloat. + : string_view(str.data(), str.size(), SkipCheckLengthTag{}) {} + + // Implicit constructor of a `string_view` from NUL-terminated `str`. When + // accepting possibly null strings, use `y_absl::NullSafeStringView(str)` + // instead (see below). + // The length check is skipped since it is unnecessary and causes code bloat. + constexpr string_view(const char* str) // NOLINT(runtime/explicit) + : ptr_(str), length_(str ? StrlenInternal(str) : 0) {} + + // Implicit constructor of a `string_view` from a `const char*` and length. + constexpr string_view(const char* data, size_type len) + : ptr_(data), length_(CheckLengthInternal(len)) {} + + // NOTE: Harmlessly omitted to work around gdb bug. + // constexpr string_view(const string_view&) noexcept = default; + // string_view& operator=(const string_view&) noexcept = default; + + // Iterators + + // string_view::begin() + // + // Returns an iterator pointing to the first character at the beginning of the + // `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator begin() const noexcept { return ptr_; } + + // string_view::end() + // + // Returns an iterator pointing just beyond the last character at the end of + // the `string_view`. This iterator acts as a placeholder; attempting to + // access it results in undefined behavior. + constexpr const_iterator end() const noexcept { return ptr_ + length_; } + + // string_view::cbegin() + // + // Returns a const iterator pointing to the first character at the beginning + // of the `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator cbegin() const noexcept { return begin(); } + + // string_view::cend() + // + // Returns a const iterator pointing just beyond the last character at the end + // of the `string_view`. This pointer acts as a placeholder; attempting to + // access its element results in undefined behavior. + constexpr const_iterator cend() const noexcept { return end(); } + + // string_view::rbegin() + // + // Returns a reverse iterator pointing to the last character at the end of the + // `string_view`, or `rend()` if the `string_view` is empty. + const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + + // string_view::rend() + // + // Returns a reverse iterator pointing just before the first character at the + // beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } + + // string_view::crbegin() + // + // Returns a const reverse iterator pointing to the last character at the end + // of the `string_view`, or `crend()` if the `string_view` is empty. + const_reverse_iterator crbegin() const noexcept { return rbegin(); } + + // string_view::crend() + // + // Returns a const reverse iterator pointing just before the first character + // at the beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator crend() const noexcept { return rend(); } + + // Capacity Utilities + + // string_view::size() + // + // Returns the number of characters in the `string_view`. + constexpr size_type size() const noexcept { return length_; } + + // string_view::length() + // + // Returns the number of characters in the `string_view`. Alias for `size()`. + constexpr size_type length() const noexcept { return size(); } + + // string_view::max_size() + // + // Returns the maximum number of characters the `string_view` can hold. + constexpr size_type max_size() const noexcept { return kMaxSize; } + + // string_view::empty() + // + // Checks if the `string_view` is empty (refers to no characters). + constexpr bool empty() const noexcept { return length_ == 0; } + + // string_view::operator[] + // + // Returns the ith element of the `string_view` using the array operator. + // Note that this operator does not perform any bounds checking. + constexpr const_reference operator[](size_type i) const { + return ABSL_HARDENING_ASSERT(i < size()), ptr_[i]; + } + + // string_view::at() + // + // Returns the ith element of the `string_view`. Bounds checking is performed, + // and an exception of type `std::out_of_range` will be thrown on invalid + // access. + constexpr const_reference at(size_type i) const { + return ABSL_PREDICT_TRUE(i < size()) + ? ptr_[i] + : ((void)base_internal::ThrowStdOutOfRange( + "y_absl::string_view::at"), + ptr_[i]); + } + + // string_view::front() + // + // Returns the first element of a `string_view`. + constexpr const_reference front() const { + return ABSL_HARDENING_ASSERT(!empty()), ptr_[0]; + } + + // string_view::back() + // + // Returns the last element of a `string_view`. + constexpr const_reference back() const { + return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1]; + } + + // string_view::data() + // + // Returns a pointer to the underlying character array (which is of course + // stored elsewhere). Note that `string_view::data()` may contain embedded nul + // characters, but the returned buffer may or may not be NUL-terminated; + // therefore, do not pass `data()` to a routine that expects a NUL-terminated + // string. + constexpr const_pointer data() const noexcept { return ptr_; } + + // Modifiers + + // string_view::remove_prefix() + // + // Removes the first `n` characters from the `string_view`. Note that the + // underlying string is not changed, only the view. + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_prefix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); + ptr_ += n; + length_ -= n; + } + + // string_view::remove_suffix() + // + // Removes the last `n` characters from the `string_view`. Note that the + // underlying string is not changed, only the view. + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_suffix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); + length_ -= n; + } + + // string_view::swap() + // + // Swaps this `string_view` with another `string_view`. + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void swap(string_view& s) noexcept { + auto t = *this; + *this = s; + s = t; + } + + // Explicit conversion operators + + // Converts to `std::basic_string`. + template <typename A> + explicit operator std::basic_string<char, traits_type, A>() const { + if (!data()) return {}; + return std::basic_string<char, traits_type, A>(data(), size()); + } + + // string_view::copy() + // + // Copies the contents of the `string_view` at offset `pos` and length `n` + // into `buf`. + size_type copy(char* buf, size_type n, size_type pos = 0) const { + if (ABSL_PREDICT_FALSE(pos > length_)) { + base_internal::ThrowStdOutOfRange("y_absl::string_view::copy"); + } + size_type rlen = (std::min)(length_ - pos, n); + if (rlen > 0) { + const char* start = ptr_ + pos; + traits_type::copy(buf, start, rlen); + } + return rlen; + } + + // string_view::substr() + // + // Returns a "substring" of the `string_view` (at offset `pos` and length + // `n`) as another string_view. This function throws `std::out_of_bounds` if + // `pos > size`. + // Use y_absl::ClippedSubstr if you need a truncating substr operation. + constexpr string_view substr(size_type pos = 0, size_type n = npos) const { + return ABSL_PREDICT_FALSE(pos > length_) + ? (base_internal::ThrowStdOutOfRange( + "y_absl::string_view::substr"), + string_view()) + : string_view(ptr_ + pos, Min(n, length_ - pos)); + } + + // string_view::compare() + // + // Performs a lexicographical comparison between this `string_view` and + // another `string_view` `x`, returning a negative value if `*this` is less + // than `x`, 0 if `*this` is equal to `x`, and a positive value if `*this` + // is greater than `x`. + constexpr int compare(string_view x) const noexcept { + return CompareImpl(length_, x.length_, + Min(length_, x.length_) == 0 + ? 0 + : ABSL_INTERNAL_STRING_VIEW_MEMCMP( + ptr_, x.ptr_, Min(length_, x.length_))); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // 'string_view` and another `y_absl::string_view`. + constexpr int compare(size_type pos1, size_type count1, string_view v) const { + return substr(pos1, count1).compare(v); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of another `y_absl::string_view`. + constexpr int compare(size_type pos1, size_type count1, string_view v, + size_type pos2, size_type count2) const { + return substr(pos1, count1).compare(v.substr(pos2, count2)); + } + + // Overload of `string_view::compare()` for comparing a `string_view` and a + // a different C-style string `s`. + constexpr int compare(const char* s) const { return compare(string_view(s)); } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a different string C-style string `s`. + constexpr int compare(size_type pos1, size_type count1, const char* s) const { + return substr(pos1, count1).compare(string_view(s)); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of a different C-style string `s`. + constexpr int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const { + return substr(pos1, count1).compare(string_view(s, count2)); + } + + // Find Utilities + + // string_view::find() + // + // Finds the first occurrence of the substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type find(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find()` for finding the given character `c` + // within the `string_view`. + size_type find(char c, size_type pos = 0) const noexcept; + + // Overload of `string_view::find()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type find(const char* s, size_type pos, size_type count) const { + return find(string_view(s, count), pos); + } + + // Overload of `string_view::find()` for finding a different C-style string + // `s` within the `string_view`. + size_type find(const char* s, size_type pos = 0) const { + return find(string_view(s), pos); + } + + // string_view::rfind() + // + // Finds the last occurrence of a substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type rfind(string_view s, size_type pos = npos) const noexcept; + + // Overload of `string_view::rfind()` for finding the last given character `c` + // within the `string_view`. + size_type rfind(char c, size_type pos = npos) const noexcept; + + // Overload of `string_view::rfind()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type rfind(const char* s, size_type pos, size_type count) const { + return rfind(string_view(s, count), pos); + } + + // Overload of `string_view::rfind()` for finding a different C-style string + // `s` within the `string_view`. + size_type rfind(const char* s, size_type pos = npos) const { + return rfind(string_view(s), pos); + } + + // string_view::find_first_of() + // + // Finds the first occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_first_of(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find_first_of()` for finding a character `c` + // within the `string_view`. + size_type find_first_of(char c, size_type pos = 0) const noexcept { + return find(c, pos); + } + + // Overload of `string_view::find_first_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos, + size_type count) const { + return find_first_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos = 0) const { + return find_first_of(string_view(s), pos); + } + + // string_view::find_last_of() + // + // Finds the last occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_last_of(string_view s, size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_of()` for finding a character `c` + // within the `string_view`. + size_type find_last_of(char c, size_type pos = npos) const noexcept { + return rfind(c, pos); + } + + // Overload of `string_view::find_last_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos, size_type count) const { + return find_last_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos = npos) const { + return find_last_of(string_view(s), pos); + } + + // string_view::find_first_not_of() + // + // Finds the first occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the first non-match, or + // `npos` if no non-match was found. + size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find_first_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_first_not_of(char c, size_type pos = 0) const noexcept; + + // Overload of `string_view::find_first_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos, + size_type count) const { + return find_first_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos = 0) const { + return find_first_not_of(string_view(s), pos); + } + + // string_view::find_last_not_of() + // + // Finds the last occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the last non-match, or + // `npos` if no non-match was found. + size_type find_last_not_of(string_view s, + size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_last_not_of(char c, size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos, + size_type count) const { + return find_last_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos = npos) const { + return find_last_not_of(string_view(s), pos); + } + + private: + // The constructor from TString delegates to this constructor. + // See the comment on that constructor for the rationale. + struct SkipCheckLengthTag {}; + string_view(const char* data, size_type len, SkipCheckLengthTag) noexcept + : ptr_(data), length_(len) {} + + static constexpr size_type kMaxSize = + (std::numeric_limits<difference_type>::max)(); + + static constexpr size_type CheckLengthInternal(size_type len) { + return ABSL_HARDENING_ASSERT(len <= kMaxSize), len; + } + + static constexpr size_type StrlenInternal(const char* str) { +#if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__) + // MSVC 2017+ can evaluate this at compile-time. + const char* begin = str; + while (*str != '\0') ++str; + return str - begin; +#elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \ + (defined(__GNUC__) && !defined(__clang__)) + // GCC has __builtin_strlen according to + // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but + // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above. + // __builtin_strlen is constexpr. + return __builtin_strlen(str); +#else + return str ? strlen(str) : 0; +#endif + } + + static constexpr size_t Min(size_type length_a, size_type length_b) { + return length_a < length_b ? length_a : length_b; + } + + static constexpr int CompareImpl(size_type length_a, size_type length_b, + int compare_result) { + return compare_result == 0 ? static_cast<int>(length_a > length_b) - + static_cast<int>(length_a < length_b) + : (compare_result < 0 ? -1 : 1); + } + + const char* ptr_; + size_type length_; +}; + +// This large function is defined inline so that in a fairly common case where +// one of the arguments is a literal, the compiler can elide a lot of the +// following comparisons. +constexpr bool operator==(string_view x, string_view y) noexcept { + return x.size() == y.size() && + (x.empty() || + ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0); +} + +constexpr bool operator!=(string_view x, string_view y) noexcept { + return !(x == y); +} + +constexpr bool operator<(string_view x, string_view y) noexcept { + return x.compare(y) < 0; +} + +constexpr bool operator>(string_view x, string_view y) noexcept { + return y < x; +} + +constexpr bool operator<=(string_view x, string_view y) noexcept { + return !(y < x); +} + +constexpr bool operator>=(string_view x, string_view y) noexcept { + return !(x < y); +} + +// IO Insertion Operator +std::ostream& operator<<(std::ostream& o, string_view piece); + +ABSL_NAMESPACE_END +} // namespace y_absl + +#undef ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR +#undef ABSL_INTERNAL_STRING_VIEW_MEMCMP + +#endif // ABSL_USES_STD_STRING_VIEW + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// ClippedSubstr() +// +// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`. +// Provided because std::string_view::substr throws if `pos > size()` +inline string_view ClippedSubstr(string_view s, size_t pos, + size_t n = string_view::npos) { + pos = (std::min)(pos, static_cast<size_t>(s.size())); + return s.substr(pos, n); +} + +// NullSafeStringView() +// +// Creates an `y_absl::string_view` from a pointer `p` even if it's null-valued. +// This function should be used where an `y_absl::string_view` can be created from +// a possibly-null pointer. +constexpr string_view NullSafeStringView(const char* p) { + return p ? string_view(p) : string_view(); +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STRING_VIEW_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/strip.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/strip.h new file mode 100644 index 0000000000..3164ff1ebc --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/strip.h @@ -0,0 +1,91 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: strip.h +// ----------------------------------------------------------------------------- +// +// This file contains various functions for stripping substrings from a string. +#ifndef ABSL_STRINGS_STRIP_H_ +#define ABSL_STRINGS_STRIP_H_ + +#include <cstddef> +#include <util/generic/string.h> + +#include "y_absl/base/macros.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/match.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN + +// ConsumePrefix() +// +// Strips the `expected` prefix from the start of the given string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// y_absl::string_view input("abc"); +// EXPECT_TRUE(y_absl::ConsumePrefix(&input, "a")); +// EXPECT_EQ(input, "bc"); +inline bool ConsumePrefix(y_absl::string_view* str, y_absl::string_view expected) { + if (!y_absl::StartsWith(*str, expected)) return false; + str->remove_prefix(expected.size()); + return true; +} +// ConsumeSuffix() +// +// Strips the `expected` suffix from the end of the given string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// y_absl::string_view input("abcdef"); +// EXPECT_TRUE(y_absl::ConsumeSuffix(&input, "def")); +// EXPECT_EQ(input, "abc"); +inline bool ConsumeSuffix(y_absl::string_view* str, y_absl::string_view expected) { + if (!y_absl::EndsWith(*str, expected)) return false; + str->remove_suffix(expected.size()); + return true; +} + +// StripPrefix() +// +// Returns a view into the input string 'str' with the given 'prefix' removed, +// but leaving the original string intact. If the prefix does not match at the +// start of the string, returns the original string instead. +ABSL_MUST_USE_RESULT inline y_absl::string_view StripPrefix( + y_absl::string_view str, y_absl::string_view prefix) { + if (y_absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size()); + return str; +} + +// StripSuffix() +// +// Returns a view into the input string 'str' with the given 'suffix' removed, +// but leaving the original string intact. If the suffix does not match at the +// end of the string, returns the original string instead. +ABSL_MUST_USE_RESULT inline y_absl::string_view StripSuffix( + y_absl::string_view str, y_absl::string_view suffix) { + if (y_absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size()); + return str; +} + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_STRIP_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.cc b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.cc new file mode 100644 index 0000000000..177fba8cbe --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.cc @@ -0,0 +1,172 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "y_absl/strings/substitute.h" + +#include <algorithm> + +#include "y_absl/base/internal/raw_logging.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/escaping.h" +#include "y_absl/strings/internal/resize_uninitialized.h" +#include "y_absl/strings/string_view.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace substitute_internal { + +void SubstituteAndAppendArray(TString* output, y_absl::string_view format, + const y_absl::string_view* args_array, + size_t num_args) { + // Determine total size needed. + size_t size = 0; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (i + 1 >= format.size()) { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid y_absl::Substitute() format string: \"%s\".", + y_absl::CEscape(format).c_str()); +#endif + return; + } else if (y_absl::ascii_isdigit(format[i + 1])) { + int index = format[i + 1] - '0'; + if (static_cast<size_t>(index) >= num_args) { +#ifndef NDEBUG + ABSL_RAW_LOG( + FATAL, + "Invalid y_absl::Substitute() format string: asked for \"$" + "%d\", but only %d args were given. Full format string was: " + "\"%s\".", + index, static_cast<int>(num_args), y_absl::CEscape(format).c_str()); +#endif + return; + } + size += args_array[index].size(); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + ++size; + ++i; // Skip next char. + } else { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid y_absl::Substitute() format string: \"%s\".", + y_absl::CEscape(format).c_str()); +#endif + return; + } + } else { + ++size; + } + } + + if (size == 0) return; + + // Build the string. + size_t original_size = output->size(); + strings_internal::STLStringResizeUninitializedAmortized(output, + original_size + size); + char* target = &(*output)[original_size]; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (y_absl::ascii_isdigit(format[i + 1])) { + const y_absl::string_view src = args_array[format[i + 1] - '0']; + target = std::copy(src.begin(), src.end(), target); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + *target++ = '$'; + ++i; // Skip next char. + } + } else { + *target++ = format[i]; + } + } + + assert(target == output->data() + output->size()); +} + +Arg::Arg(const void* value) { + static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, + "fix sizeof(scratch_)"); + if (value == nullptr) { + piece_ = "NULL"; + } else { + char* ptr = scratch_ + sizeof(scratch_); + uintptr_t num = reinterpret_cast<uintptr_t>(value); + do { + *--ptr = y_absl::numbers_internal::kHexChar[num & 0xf]; + num >>= 4; + } while (num != 0); + *--ptr = 'x'; + *--ptr = '0'; + piece_ = y_absl::string_view(ptr, scratch_ + sizeof(scratch_) - ptr); + } +} + +// TODO(jorg): Don't duplicate so much code between here and str_cat.cc +Arg::Arg(Hex hex) { + char* const end = &scratch_[numbers_internal::kFastToBufferSize]; + char* writer = end; + uint64_t value = hex.value; + do { + *--writer = y_absl::numbers_internal::kHexChar[value & 0xF]; + value >>= 4; + } while (value != 0); + + char* beg; + if (end - writer < hex.width) { + beg = end - hex.width; + std::fill_n(beg, writer - beg, hex.fill); + } else { + beg = writer; + } + + piece_ = y_absl::string_view(beg, end - beg); +} + +// TODO(jorg): Don't duplicate so much code between here and str_cat.cc +Arg::Arg(Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char* const end = &scratch_[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t value = dec.value; + bool neg = dec.neg; + while (value > 9) { + *--writer = '0' + (value % 10); + value /= 10; + } + *--writer = '0' + value; + if (neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + piece_ = y_absl::string_view(writer, end - writer); +} + +} // namespace substitute_internal +ABSL_NAMESPACE_END +} // namespace y_absl diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.h b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.h new file mode 100644 index 0000000000..c31191fbda --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/substitute.h @@ -0,0 +1,723 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: substitute.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently performing string +// substitutions using a format string with positional notation: +// `Substitute()` and `SubstituteAndAppend()`. +// +// Unlike printf-style format specifiers, `Substitute()` functions do not need +// to specify the type of the substitution arguments. Supported arguments +// following the format string, such as strings, string_views, ints, +// floats, and bools, are automatically converted to strings during the +// substitution process. (See below for a full list of supported types.) +// +// `Substitute()` does not allow you to specify *how* to format a value, beyond +// the default conversion to string. For example, you cannot format an integer +// in hex. +// +// The format string uses positional identifiers indicated by a dollar sign ($) +// and single digit positional ids to indicate which substitution arguments to +// use at that location within the format string. +// +// A '$$' sequence in the format string causes a literal '$' character to be +// output. +// +// Example 1: +// TString s = Substitute("$1 purchased $0 $2 for $$10. Thanks $1!", +// 5, "Bob", "Apples"); +// EXPECT_EQ("Bob purchased 5 Apples for $10. Thanks Bob!", s); +// +// Example 2: +// TString s = "Hi. "; +// SubstituteAndAppend(&s, "My name is $0 and I am $1 years old.", "Bob", 5); +// EXPECT_EQ("Hi. My name is Bob and I am 5 years old.", s); +// +// Supported types: +// * y_absl::string_view, TString, const char* (null is equivalent to "") +// * int32_t, int64_t, uint32_t, uint64_t +// * float, double +// * bool (Printed as "true" or "false") +// * pointer types other than char* (Printed as "0x<lower case hex string>", +// except that null is printed as "NULL") +// +// If an invalid format string is provided, Substitute returns an empty string +// and SubstituteAndAppend does not change the provided output string. +// A format string is invalid if it: +// * ends in an unescaped $ character, +// e.g. "Hello $", or +// * calls for a position argument which is not provided, +// e.g. Substitute("Hello $2", "world"), or +// * specifies a non-digit, non-$ character after an unescaped $ character, +// e.g. "Hello $f". +// In debug mode, i.e. #ifndef NDEBUG, such errors terminate the program. + +#ifndef ABSL_STRINGS_SUBSTITUTE_H_ +#define ABSL_STRINGS_SUBSTITUTE_H_ + +#include <cstring> +#include <util/generic/string.h> +#include <type_traits> +#include <vector> + +#include "y_absl/base/macros.h" +#include "y_absl/base/port.h" +#include "y_absl/strings/ascii.h" +#include "y_absl/strings/escaping.h" +#include "y_absl/strings/numbers.h" +#include "y_absl/strings/str_cat.h" +#include "y_absl/strings/str_split.h" +#include "y_absl/strings/string_view.h" +#include "y_absl/strings/strip.h" + +namespace y_absl { +ABSL_NAMESPACE_BEGIN +namespace substitute_internal { + +// Arg +// +// This class provides an argument type for `y_absl::Substitute()` and +// `y_absl::SubstituteAndAppend()`. `Arg` handles implicit conversion of various +// types to a string. (`Arg` is very similar to the `AlphaNum` class in +// `StrCat()`.) +// +// This class has implicit constructors. +class Arg { + public: + // Overloads for string-y things + // + // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. + Arg(const char* value) // NOLINT(runtime/explicit) + : piece_(y_absl::NullSafeStringView(value)) {} + template <typename Allocator> + Arg( // NOLINT + const std::basic_string<char, std::char_traits<char>, Allocator>& + value) noexcept + : piece_(value) {} + Arg(y_absl::string_view value) // NOLINT(runtime/explicit) + : piece_(value) {} + Arg(const TString& s) + : piece_(s.data(), s.size()) {} + + // Overloads for primitives + // + // No overloads are available for signed and unsigned char because if people + // are explicitly declaring their chars as signed or unsigned then they are + // probably using them as 8-bit integers and would probably prefer an integer + // representation. However, we can't really know, so we make the caller decide + // what to do. + Arg(char value) // NOLINT(runtime/explicit) + : piece_(scratch_, 1) { + scratch_[0] = value; + } + Arg(short value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned short value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(float value) // NOLINT(runtime/explicit) + : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { + } + Arg(double value) // NOLINT(runtime/explicit) + : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { + } + Arg(bool value) // NOLINT(runtime/explicit) + : piece_(value ? "true" : "false") {} + + Arg(Hex hex); // NOLINT(runtime/explicit) + Arg(Dec dec); // NOLINT(runtime/explicit) + + // vector<bool>::reference and const_reference require special help to + // convert to `AlphaNum` because it requires two user defined conversions. + template <typename T, + y_absl::enable_if_t< + std::is_class<T>::value && + (std::is_same<T, std::vector<bool>::reference>::value || + std::is_same<T, std::vector<bool>::const_reference>::value)>* = + nullptr> + Arg(T value) // NOLINT(google-explicit-constructor) + : Arg(static_cast<bool>(value)) {} + + // `void*` values, with the exception of `char*`, are printed as + // "0x<hex value>". However, in the case of `nullptr`, "NULL" is printed. + Arg(const void* value); // NOLINT(runtime/explicit) + + Arg(const Arg&) = delete; + Arg& operator=(const Arg&) = delete; + + y_absl::string_view piece() const { return piece_; } + + private: + y_absl::string_view piece_; + char scratch_[numbers_internal::kFastToBufferSize]; +}; + +// Internal helper function. Don't call this from outside this implementation. +// This interface may change without notice. +void SubstituteAndAppendArray(TString* output, y_absl::string_view format, + const y_absl::string_view* args_array, + size_t num_args); + +#if defined(ABSL_BAD_CALL_IF) +constexpr int CalculateOneBit(const char* format) { + // Returns: + // * 2^N for '$N' when N is in [0-9] + // * 0 for correct '$' escaping: '$$'. + // * -1 otherwise. + return (*format < '0' || *format > '9') ? (*format == '$' ? 0 : -1) + : (1 << (*format - '0')); +} + +constexpr const char* SkipNumber(const char* format) { + return !*format ? format : (format + 1); +} + +constexpr int PlaceholderBitmask(const char* format) { + return !*format + ? 0 + : *format != '$' ? PlaceholderBitmask(format + 1) + : (CalculateOneBit(format + 1) | + PlaceholderBitmask(SkipNumber(format + 1))); +} +#endif // ABSL_BAD_CALL_IF + +} // namespace substitute_internal + +// +// PUBLIC API +// + +// SubstituteAndAppend() +// +// Substitutes variables into a given format string and appends to a given +// output string. See file comments above for usage. +// +// The declarations of `SubstituteAndAppend()` below consist of overloads +// for passing 0 to 10 arguments, respectively. +// +// NOTE: A zero-argument `SubstituteAndAppend()` may be used within variadic +// templates to allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(TString* boilerplate, y_absl::string_view format, +// const Args&... args) { +// y_absl::SubstituteAndAppend(boilerplate, format, args...); +// } +// +inline void SubstituteAndAppend(TString* output, y_absl::string_view format) { + substitute_internal::SubstituteAndAppendArray(output, format, nullptr, 0); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0) { + const y_absl::string_view args[] = {a0.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + const y_absl::string_view args[] = {a0.piece(), a1.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8) { + const y_absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece(), a8.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + TString* output, y_absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) { + const y_absl::string_view args[] = { + a0.piece(), a1.piece(), a2.piece(), a3.piece(), a4.piece(), + a5.piece(), a6.piece(), a7.piece(), a8.piece(), a9.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +void SubstituteAndAppend(TString* output, const char* format) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string either has a $[0-9] in it or contains " + "an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is missing its $0, contains " + "one of $1-$9, or contains an unescaped $ character (use " + "$$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but this format string is " + "missing its $0/$1, contains one of $2-$9, or contains an " + "unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend(TString* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend( + TString* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend( + TString* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend( + TString* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); + +void SubstituteAndAppend( + TString* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); +#endif // ABSL_BAD_CALL_IF + +// Substitute() +// +// Substitutes variables into a given format string. See file comments above +// for usage. +// +// The declarations of `Substitute()` below consist of overloads for passing 0 +// to 10 arguments, respectively. +// +// NOTE: A zero-argument `Substitute()` may be used within variadic templates to +// allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(y_absl::string_view format, const Args&... args) { +// TString s = y_absl::Substitute(format, args...); + +ABSL_MUST_USE_RESULT inline TString Substitute(y_absl::string_view format) { + TString result; + SubstituteAndAppend(&result, format); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0) { + TString result; + SubstituteAndAppend(&result, format, a0); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + TString result; + SubstituteAndAppend(&result, format, a0, a1); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8); + return result; +} + +ABSL_MUST_USE_RESULT inline TString Substitute( + y_absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) { + TString result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + return result; +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +TString Substitute(const char* format) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string either has a $[0-9] in it or " + "contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is missing its $0, contains one of $1-$9, " + "or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format string is missing its $0/$1, contains one of " + "$2-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); + +TString Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); + +TString Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); +#endif // ABSL_BAD_CALL_IF + +ABSL_NAMESPACE_END +} // namespace y_absl + +#endif // ABSL_STRINGS_SUBSTITUTE_H_ diff --git a/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ya.make b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ya.make new file mode 100644 index 0000000000..77c5a47dc9 --- /dev/null +++ b/contrib/restricted/abseil-cpp-tstring/y_absl/strings/ya.make @@ -0,0 +1,46 @@ +# Generated by devtools/yamaker. + +LIBRARY() + +OWNER( + somov + g:cpp-contrib +) + +LICENSE(Apache-2.0) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +PEERDIR( + contrib/restricted/abseil-cpp-tstring/y_absl/base + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/raw_logging + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/spinlock_wait + contrib/restricted/abseil-cpp-tstring/y_absl/base/internal/throw_delegate + contrib/restricted/abseil-cpp-tstring/y_absl/base/log_severity + contrib/restricted/abseil-cpp-tstring/y_absl/numeric + contrib/restricted/abseil-cpp-tstring/y_absl/strings/internal/absl_strings_internal +) + +ADDINCL( + GLOBAL contrib/restricted/abseil-cpp-tstring +) + +NO_COMPILER_WARNINGS() + +SRCS( + ascii.cc + charconv.cc + escaping.cc + internal/charconv_bigint.cc + internal/charconv_parse.cc + internal/memutil.cc + match.cc + numbers.cc + str_cat.cc + str_replace.cc + str_split.cc + string_view.cc + substitute.cc +) + +END() |