diff options
author | Andrey Khalyavin <halyavin@gmail.com> | 2022-02-10 16:46:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:30 +0300 |
commit | 4b839d0704ee9be1dabb0310a1f03af24963637b (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/cxxsupp/libcxx/src/regex.cpp | |
parent | f773626848a7c7456803654292e716b83d69cc12 (diff) | |
download | ydb-4b839d0704ee9be1dabb0310a1f03af24963637b.tar.gz |
Restoring authorship annotation for Andrey Khalyavin <halyavin@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/cxxsupp/libcxx/src/regex.cpp')
-rw-r--r-- | contrib/libs/cxxsupp/libcxx/src/regex.cpp | 624 |
1 files changed, 312 insertions, 312 deletions
diff --git a/contrib/libs/cxxsupp/libcxx/src/regex.cpp b/contrib/libs/cxxsupp/libcxx/src/regex.cpp index e7c23deea4..d31e494874 100644 --- a/contrib/libs/cxxsupp/libcxx/src/regex.cpp +++ b/contrib/libs/cxxsupp/libcxx/src/regex.cpp @@ -1,316 +1,316 @@ -//===-------------------------- regex.cpp ---------------------------------===// -// +//===-------------------------- regex.cpp ---------------------------------===// +// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "regex" -#include "algorithm" -#include "iterator" - -_LIBCPP_BEGIN_NAMESPACE_STD - -static -const char* -make_error_type_string(regex_constants::error_type ecode) -{ - switch (ecode) - { - case regex_constants::error_collate: - return "The expression contained an invalid collating element name."; - case regex_constants::error_ctype: - return "The expression contained an invalid character class name."; - case regex_constants::error_escape: - return "The expression contained an invalid escaped character, or a " - "trailing escape."; - case regex_constants::error_backref: - return "The expression contained an invalid back reference."; - case regex_constants::error_brack: - return "The expression contained mismatched [ and ]."; - case regex_constants::error_paren: - return "The expression contained mismatched ( and )."; - case regex_constants::error_brace: - return "The expression contained mismatched { and }."; - case regex_constants::error_badbrace: - return "The expression contained an invalid range in a {} expression."; - case regex_constants::error_range: - return "The expression contained an invalid character range, " - "such as [b-a] in most encodings."; - case regex_constants::error_space: - return "There was insufficient memory to convert the expression into " - "a finite state machine."; - case regex_constants::error_badrepeat: - return "One of *?+{ was not preceded by a valid regular expression."; - case regex_constants::error_complexity: - return "The complexity of an attempted match against a regular " - "expression exceeded a pre-set level."; - case regex_constants::error_stack: - return "There was insufficient memory to determine whether the regular " - "expression could match the specified character sequence."; - case regex_constants::__re_err_grammar: - return "An invalid regex grammar has been requested."; - case regex_constants::__re_err_empty: - return "An empty regex is not allowed in the POSIX grammar."; - case regex_constants::__re_err_parse: - return "The parser did not consume the entire regular expression."; - default: - break; - } - return "Unknown error type"; -} - -regex_error::regex_error(regex_constants::error_type ecode) - : runtime_error(make_error_type_string(ecode)), - __code_(ecode) -{} - +// +//===----------------------------------------------------------------------===// + +#include "regex" +#include "algorithm" +#include "iterator" + +_LIBCPP_BEGIN_NAMESPACE_STD + +static +const char* +make_error_type_string(regex_constants::error_type ecode) +{ + switch (ecode) + { + case regex_constants::error_collate: + return "The expression contained an invalid collating element name."; + case regex_constants::error_ctype: + return "The expression contained an invalid character class name."; + case regex_constants::error_escape: + return "The expression contained an invalid escaped character, or a " + "trailing escape."; + case regex_constants::error_backref: + return "The expression contained an invalid back reference."; + case regex_constants::error_brack: + return "The expression contained mismatched [ and ]."; + case regex_constants::error_paren: + return "The expression contained mismatched ( and )."; + case regex_constants::error_brace: + return "The expression contained mismatched { and }."; + case regex_constants::error_badbrace: + return "The expression contained an invalid range in a {} expression."; + case regex_constants::error_range: + return "The expression contained an invalid character range, " + "such as [b-a] in most encodings."; + case regex_constants::error_space: + return "There was insufficient memory to convert the expression into " + "a finite state machine."; + case regex_constants::error_badrepeat: + return "One of *?+{ was not preceded by a valid regular expression."; + case regex_constants::error_complexity: + return "The complexity of an attempted match against a regular " + "expression exceeded a pre-set level."; + case regex_constants::error_stack: + return "There was insufficient memory to determine whether the regular " + "expression could match the specified character sequence."; + case regex_constants::__re_err_grammar: + return "An invalid regex grammar has been requested."; + case regex_constants::__re_err_empty: + return "An empty regex is not allowed in the POSIX grammar."; + case regex_constants::__re_err_parse: + return "The parser did not consume the entire regular expression."; + default: + break; + } + return "Unknown error type"; +} + +regex_error::regex_error(regex_constants::error_type ecode) + : runtime_error(make_error_type_string(ecode)), + __code_(ecode) +{} + regex_error::~regex_error() throw() {} - -namespace { - -struct collationnames -{ - const char* elem_; - char char_; -}; - -const collationnames collatenames[] = -{ - {"A", 0x41}, - {"B", 0x42}, - {"C", 0x43}, - {"D", 0x44}, - {"E", 0x45}, - {"F", 0x46}, - {"G", 0x47}, - {"H", 0x48}, - {"I", 0x49}, - {"J", 0x4a}, - {"K", 0x4b}, - {"L", 0x4c}, - {"M", 0x4d}, - {"N", 0x4e}, - {"NUL", 0x00}, - {"O", 0x4f}, - {"P", 0x50}, - {"Q", 0x51}, - {"R", 0x52}, - {"S", 0x53}, - {"T", 0x54}, - {"U", 0x55}, - {"V", 0x56}, - {"W", 0x57}, - {"X", 0x58}, - {"Y", 0x59}, - {"Z", 0x5a}, - {"a", 0x61}, - {"alert", 0x07}, - {"ampersand", 0x26}, - {"apostrophe", 0x27}, - {"asterisk", 0x2a}, - {"b", 0x62}, - {"backslash", 0x5c}, - {"backspace", 0x08}, - {"c", 0x63}, - {"carriage-return", 0x0d}, - {"circumflex", 0x5e}, - {"circumflex-accent", 0x5e}, - {"colon", 0x3a}, - {"comma", 0x2c}, - {"commercial-at", 0x40}, - {"d", 0x64}, - {"dollar-sign", 0x24}, - {"e", 0x65}, - {"eight", 0x38}, - {"equals-sign", 0x3d}, - {"exclamation-mark", 0x21}, - {"f", 0x66}, - {"five", 0x35}, - {"form-feed", 0x0c}, - {"four", 0x34}, - {"full-stop", 0x2e}, - {"g", 0x67}, - {"grave-accent", 0x60}, - {"greater-than-sign", 0x3e}, - {"h", 0x68}, - {"hyphen", 0x2d}, - {"hyphen-minus", 0x2d}, - {"i", 0x69}, - {"j", 0x6a}, - {"k", 0x6b}, - {"l", 0x6c}, - {"left-brace", 0x7b}, - {"left-curly-bracket", 0x7b}, - {"left-parenthesis", 0x28}, - {"left-square-bracket", 0x5b}, - {"less-than-sign", 0x3c}, - {"low-line", 0x5f}, - {"m", 0x6d}, - {"n", 0x6e}, - {"newline", 0x0a}, - {"nine", 0x39}, - {"number-sign", 0x23}, - {"o", 0x6f}, - {"one", 0x31}, - {"p", 0x70}, - {"percent-sign", 0x25}, - {"period", 0x2e}, - {"plus-sign", 0x2b}, - {"q", 0x71}, - {"question-mark", 0x3f}, - {"quotation-mark", 0x22}, - {"r", 0x72}, - {"reverse-solidus", 0x5c}, - {"right-brace", 0x7d}, - {"right-curly-bracket", 0x7d}, - {"right-parenthesis", 0x29}, - {"right-square-bracket", 0x5d}, - {"s", 0x73}, - {"semicolon", 0x3b}, - {"seven", 0x37}, - {"six", 0x36}, - {"slash", 0x2f}, - {"solidus", 0x2f}, - {"space", 0x20}, - {"t", 0x74}, - {"tab", 0x09}, - {"three", 0x33}, - {"tilde", 0x7e}, - {"two", 0x32}, - {"u", 0x75}, - {"underscore", 0x5f}, - {"v", 0x76}, - {"vertical-line", 0x7c}, - {"vertical-tab", 0x0b}, - {"w", 0x77}, - {"x", 0x78}, - {"y", 0x79}, - {"z", 0x7a}, - {"zero", 0x30} -}; - -struct classnames -{ - const char* elem_; - regex_traits<char>::char_class_type mask_; -}; - -const classnames ClassNames[] = -{ - {"alnum", ctype_base::alnum}, - {"alpha", ctype_base::alpha}, - {"blank", ctype_base::blank}, - {"cntrl", ctype_base::cntrl}, - {"d", ctype_base::digit}, - {"digit", ctype_base::digit}, - {"graph", ctype_base::graph}, - {"lower", ctype_base::lower}, - {"print", ctype_base::print}, - {"punct", ctype_base::punct}, - {"s", ctype_base::space}, - {"space", ctype_base::space}, - {"upper", ctype_base::upper}, - {"w", regex_traits<char>::__regex_word}, - {"xdigit", ctype_base::xdigit} -}; - -struct use_strcmp -{ - bool operator()(const collationnames& x, const char* y) - {return strcmp(x.elem_, y) < 0;} - bool operator()(const classnames& x, const char* y) - {return strcmp(x.elem_, y) < 0;} -}; - -} - -string -__get_collation_name(const char* s) -{ - const collationnames* i = - _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); - string r; - if (i != end(collatenames) && strcmp(s, i->elem_) == 0) - r = char(i->char_); - return r; -} - -regex_traits<char>::char_class_type -__get_classname(const char* s, bool __icase) -{ - const classnames* i = - _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); - regex_traits<char>::char_class_type r = 0; - if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) - { - r = i->mask_; - if (r == regex_traits<char>::__regex_word) - r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; - else if (__icase) - { - if (r & (ctype_base::lower | ctype_base::upper)) - r |= ctype_base::alpha; - } - } - return r; -} - -template <> -void -__match_any_but_newline<char>::__exec(__state& __s) const -{ - if (__s.__current_ != __s.__last_) - { - switch (*__s.__current_) - { - case '\r': - case '\n': - __s.__do_ = __state::__reject; - __s.__node_ = nullptr; - break; - default: - __s.__do_ = __state::__accept_and_consume; - ++__s.__current_; - __s.__node_ = this->first(); - break; - } - } - else - { - __s.__do_ = __state::__reject; - __s.__node_ = nullptr; - } -} - -template <> -void -__match_any_but_newline<wchar_t>::__exec(__state& __s) const -{ - if (__s.__current_ != __s.__last_) - { - switch (*__s.__current_) - { - case '\r': - case '\n': - case 0x2028: - case 0x2029: - __s.__do_ = __state::__reject; - __s.__node_ = nullptr; - break; - default: - __s.__do_ = __state::__accept_and_consume; - ++__s.__current_; - __s.__node_ = this->first(); - break; - } - } - else - { - __s.__do_ = __state::__reject; - __s.__node_ = nullptr; - } -} - -_LIBCPP_END_NAMESPACE_STD + +namespace { + +struct collationnames +{ + const char* elem_; + char char_; +}; + +const collationnames collatenames[] = +{ + {"A", 0x41}, + {"B", 0x42}, + {"C", 0x43}, + {"D", 0x44}, + {"E", 0x45}, + {"F", 0x46}, + {"G", 0x47}, + {"H", 0x48}, + {"I", 0x49}, + {"J", 0x4a}, + {"K", 0x4b}, + {"L", 0x4c}, + {"M", 0x4d}, + {"N", 0x4e}, + {"NUL", 0x00}, + {"O", 0x4f}, + {"P", 0x50}, + {"Q", 0x51}, + {"R", 0x52}, + {"S", 0x53}, + {"T", 0x54}, + {"U", 0x55}, + {"V", 0x56}, + {"W", 0x57}, + {"X", 0x58}, + {"Y", 0x59}, + {"Z", 0x5a}, + {"a", 0x61}, + {"alert", 0x07}, + {"ampersand", 0x26}, + {"apostrophe", 0x27}, + {"asterisk", 0x2a}, + {"b", 0x62}, + {"backslash", 0x5c}, + {"backspace", 0x08}, + {"c", 0x63}, + {"carriage-return", 0x0d}, + {"circumflex", 0x5e}, + {"circumflex-accent", 0x5e}, + {"colon", 0x3a}, + {"comma", 0x2c}, + {"commercial-at", 0x40}, + {"d", 0x64}, + {"dollar-sign", 0x24}, + {"e", 0x65}, + {"eight", 0x38}, + {"equals-sign", 0x3d}, + {"exclamation-mark", 0x21}, + {"f", 0x66}, + {"five", 0x35}, + {"form-feed", 0x0c}, + {"four", 0x34}, + {"full-stop", 0x2e}, + {"g", 0x67}, + {"grave-accent", 0x60}, + {"greater-than-sign", 0x3e}, + {"h", 0x68}, + {"hyphen", 0x2d}, + {"hyphen-minus", 0x2d}, + {"i", 0x69}, + {"j", 0x6a}, + {"k", 0x6b}, + {"l", 0x6c}, + {"left-brace", 0x7b}, + {"left-curly-bracket", 0x7b}, + {"left-parenthesis", 0x28}, + {"left-square-bracket", 0x5b}, + {"less-than-sign", 0x3c}, + {"low-line", 0x5f}, + {"m", 0x6d}, + {"n", 0x6e}, + {"newline", 0x0a}, + {"nine", 0x39}, + {"number-sign", 0x23}, + {"o", 0x6f}, + {"one", 0x31}, + {"p", 0x70}, + {"percent-sign", 0x25}, + {"period", 0x2e}, + {"plus-sign", 0x2b}, + {"q", 0x71}, + {"question-mark", 0x3f}, + {"quotation-mark", 0x22}, + {"r", 0x72}, + {"reverse-solidus", 0x5c}, + {"right-brace", 0x7d}, + {"right-curly-bracket", 0x7d}, + {"right-parenthesis", 0x29}, + {"right-square-bracket", 0x5d}, + {"s", 0x73}, + {"semicolon", 0x3b}, + {"seven", 0x37}, + {"six", 0x36}, + {"slash", 0x2f}, + {"solidus", 0x2f}, + {"space", 0x20}, + {"t", 0x74}, + {"tab", 0x09}, + {"three", 0x33}, + {"tilde", 0x7e}, + {"two", 0x32}, + {"u", 0x75}, + {"underscore", 0x5f}, + {"v", 0x76}, + {"vertical-line", 0x7c}, + {"vertical-tab", 0x0b}, + {"w", 0x77}, + {"x", 0x78}, + {"y", 0x79}, + {"z", 0x7a}, + {"zero", 0x30} +}; + +struct classnames +{ + const char* elem_; + regex_traits<char>::char_class_type mask_; +}; + +const classnames ClassNames[] = +{ + {"alnum", ctype_base::alnum}, + {"alpha", ctype_base::alpha}, + {"blank", ctype_base::blank}, + {"cntrl", ctype_base::cntrl}, + {"d", ctype_base::digit}, + {"digit", ctype_base::digit}, + {"graph", ctype_base::graph}, + {"lower", ctype_base::lower}, + {"print", ctype_base::print}, + {"punct", ctype_base::punct}, + {"s", ctype_base::space}, + {"space", ctype_base::space}, + {"upper", ctype_base::upper}, + {"w", regex_traits<char>::__regex_word}, + {"xdigit", ctype_base::xdigit} +}; + +struct use_strcmp +{ + bool operator()(const collationnames& x, const char* y) + {return strcmp(x.elem_, y) < 0;} + bool operator()(const classnames& x, const char* y) + {return strcmp(x.elem_, y) < 0;} +}; + +} + +string +__get_collation_name(const char* s) +{ + const collationnames* i = + _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); + string r; + if (i != end(collatenames) && strcmp(s, i->elem_) == 0) + r = char(i->char_); + return r; +} + +regex_traits<char>::char_class_type +__get_classname(const char* s, bool __icase) +{ + const classnames* i = + _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); + regex_traits<char>::char_class_type r = 0; + if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) + { + r = i->mask_; + if (r == regex_traits<char>::__regex_word) + r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; + else if (__icase) + { + if (r & (ctype_base::lower | ctype_base::upper)) + r |= ctype_base::alpha; + } + } + return r; +} + +template <> +void +__match_any_but_newline<char>::__exec(__state& __s) const +{ + if (__s.__current_ != __s.__last_) + { + switch (*__s.__current_) + { + case '\r': + case '\n': + __s.__do_ = __state::__reject; + __s.__node_ = nullptr; + break; + default: + __s.__do_ = __state::__accept_and_consume; + ++__s.__current_; + __s.__node_ = this->first(); + break; + } + } + else + { + __s.__do_ = __state::__reject; + __s.__node_ = nullptr; + } +} + +template <> +void +__match_any_but_newline<wchar_t>::__exec(__state& __s) const +{ + if (__s.__current_ != __s.__last_) + { + switch (*__s.__current_) + { + case '\r': + case '\n': + case 0x2028: + case 0x2029: + __s.__do_ = __state::__reject; + __s.__node_ = nullptr; + break; + default: + __s.__do_ = __state::__accept_and_consume; + ++__s.__current_; + __s.__node_ = this->first(); + break; + } + } + else + { + __s.__do_ = __state::__reject; + __s.__node_ = nullptr; + } +} + +_LIBCPP_END_NAMESPACE_STD |